libxml / tests /tests_HTMLparser_htmlParseLookupString.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <string.h>
#include <stdlib.h>
/* Wrapper provided by the module for the static function */
extern int test_htmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
const char *str, size_t strLen, size_t extraLen);
static xmlParserCtxtPtr make_ctxt(const char *buf) {
/* Use the in-memory context creation to set up base/cur/end correctly. */
size_t len = strlen(buf);
xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(buf, (int)len);
return ctxt;
}
void setUp(void) {
xmlInitParser();
}
void tearDown(void) {
/* Clean up global parser state after each test */
xmlCleanupParser();
}
/* Test: Successful match with checkIndex == 0; startDelta is honored. */
void test_htmlParseLookupString_basic_found(void) {
const char *buf = "zzabcYYY";
xmlParserCtxtPtr ctxt = make_ctxt(buf);
TEST_ASSERT_NOT_NULL(ctxt);
TEST_ASSERT_NOT_NULL(ctxt->input);
int ret = test_htmlParseLookupString(ctxt, 2, "abc", 3, 0);
TEST_ASSERT_EQUAL_INT(2, ret);
TEST_ASSERT_EQUAL_INT(0, ctxt->checkIndex);
xmlFreeParserCtxt(ctxt);
}
/* Test: No match at all; verify -1 return and computed checkIndex via rescan logic. */
void test_htmlParseLookupString_not_found_sets_checkIndex_rescan(void) {
const char *buf = "abcdef"; /* len = 6 */
xmlParserCtxtPtr ctxt = make_ctxt(buf);
TEST_ASSERT_NOT_NULL(ctxt);
TEST_ASSERT_NOT_NULL(ctxt->input);
/* strLen=3, extraLen=2 => rescan=4; end-cur=6 => index=(end - rescan) - base = 2 */
int ret = test_htmlParseLookupString(ctxt, 0, "xyz", 3, 2);
TEST_ASSERT_EQUAL_INT(-1, ret);
TEST_ASSERT_EQUAL_INT(2, ctxt->checkIndex);
xmlFreeParserCtxt(ctxt);
}
/* Test: strstr finds a term, but too close to end for extraLen constraint, so not found path taken. */
void test_htmlParseLookupString_found_but_too_close_to_end_treated_as_not_found(void) {
const char *buf = "abXY"; /* len = 4; term "XY" at pos 2; end - term = 2 */
xmlParserCtxtPtr ctxt = make_ctxt(buf);
TEST_ASSERT_NOT_NULL(ctxt);
TEST_ASSERT_NOT_NULL(ctxt->input);
/* extraLen=2 => need (end - term) >= 3 which fails -> not found path.
rescan = 2 + 2 - 1 = 3; end-cur = 4 > 3 => index = (end - rescan) - base = 1 */
int ret = test_htmlParseLookupString(ctxt, 0, "XY", 2, 2);
TEST_ASSERT_EQUAL_INT(-1, ret);
TEST_ASSERT_EQUAL_INT(1, ctxt->checkIndex);
xmlFreeParserCtxt(ctxt);
}
/* Test: When checkIndex != 0, the function uses it instead of startDelta; after success, checkIndex resets to 0. */
void test_htmlParseLookupString_uses_checkIndex_over_startDelta_and_resets_on_success(void) {
const char *buf = "1234567abc"; /* len = 10; "abc" at pos 7 */
xmlParserCtxtPtr ctxt = make_ctxt(buf);
TEST_ASSERT_NOT_NULL(ctxt);
TEST_ASSERT_NOT_NULL(ctxt->input);
/* First call: force "not found" by extraLen constraint; compute checkIndex */
/* extraLen=4: (end - term)=3 < 5 -> not found; rescan=3+4-1=6; end-cur=10>6 -> checkIndex=4 */
int ret1 = test_htmlParseLookupString(ctxt, 0, "abc", 3, 4);
TEST_ASSERT_EQUAL_INT(-1, ret1);
TEST_ASSERT_EQUAL_INT(4, ctxt->checkIndex);
/* Second call: startDelta is misleading (8), but checkIndex=4 dictates start point.
With extraLen=0, should now find "abc" at pos 7 and reset checkIndex to 0. */
int ret2 = test_htmlParseLookupString(ctxt, 8, "abc", 3, 0);
TEST_ASSERT_EQUAL_INT(7, ret2);
TEST_ASSERT_EQUAL_INT(0, ctxt->checkIndex);
xmlFreeParserCtxt(ctxt);
}
/* Test: Case where (end - cur) <= rescan forces end=cur, index=0, ret=-1 and checkIndex=0. */
void test_htmlParseLookupString_rescan_ge_remaining_forces_zero_index(void) {
const char *buf = "abcde"; /* len = 5 */
xmlParserCtxtPtr ctxt = make_ctxt(buf);
TEST_ASSERT_NOT_NULL(ctxt);
TEST_ASSERT_NOT_NULL(ctxt->input);
/* strLen=3, extraLen=3 => rescan=5; end-cur=5 <= rescan => index=0 */
int ret = test_htmlParseLookupString(ctxt, 0, "nop", 3, 3);
TEST_ASSERT_EQUAL_INT(-1, ret);
TEST_ASSERT_EQUAL_INT(0, ctxt->checkIndex);
xmlFreeParserCtxt(ctxt);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseLookupString_basic_found);
RUN_TEST(test_htmlParseLookupString_not_found_sets_checkIndex_rescan);
RUN_TEST(test_htmlParseLookupString_found_but_too_close_to_end_treated_as_not_found);
RUN_TEST(test_htmlParseLookupString_uses_checkIndex_over_startDelta_and_resets_on_success);
RUN_TEST(test_htmlParseLookupString_rescan_ge_remaining_forces_zero_index);
return UNITY_END();
}