File size: 5,800 Bytes
6baed57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/dict.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
/* Wrapper for the static function provided in the source module */
extern xmlHashedString test_htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr);
static void assert_name_parse(const unsigned char *data, size_t len,
int attr,
const char *expected_name,
size_t expected_advance,
unsigned char expected_stop)
{
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt((const char *)data, (int)len);
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context");
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->dict, "Parser context dictionary is NULL");
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->input, "Parser input is NULL");
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->input->base, "Parser input base is NULL");
const unsigned char *start = ctxt->input->base;
const unsigned char *before = ctxt->input->cur;
xmlHashedString ret = test_htmlParseHTMLName(ctxt, attr);
/* Verify the returned name */
TEST_ASSERT_NOT_NULL_MESSAGE(ret.name, "Returned name is NULL (memory error?)");
TEST_ASSERT_EQUAL_STRING_MESSAGE(expected_name, (const char *)ret.name,
"Parsed name doesn't match expected");
/* Verify input advancement and stop character */
size_t advanced = (size_t)(ctxt->input->cur - start);
size_t expected_adv = expected_advance;
TEST_ASSERT_EQUAL_size_t_MESSAGE(expected_adv, advanced, "Input pointer advanced by unexpected amount");
/* Ensure we didn't consume the stop character */
if (advanced < len) {
TEST_ASSERT_EQUAL_UINT8_MESSAGE(expected_stop, (unsigned char)ctxt->input->cur[0],
"Did not stop at expected stop character");
} else {
/* If we advanced to end, ensure expected stop indicates end (no stop char available) */
TEST_FAIL_MESSAGE("Input advanced to end unexpectedly; expected a stop character to remain");
}
htmlFreeParserCtxt(ctxt);
}
/* Unity setup/teardown */
void setUp(void) {
/* no-op */
}
void tearDown(void) {
/* no-op */
}
/* Tests */
/* ASCII name should be lowercased; stop on '>' without consuming it */
void test_htmlParseHTMLName_ascii_tag_lowercase_stop_on_gt(void) {
static const unsigned char data[] = "DIV>";
/* Expect "div", consumed 3 bytes, stop char '>' at index 3 */
assert_name_parse(data, sizeof(data) - 1, 0, "div", 3, '>');
}
/* Attribute name parsing stops at '=' without consuming it; ASCII lowercasing applies */
void test_htmlParseHTMLName_attr_stop_on_equal(void) {
static const unsigned char data[] = "HREF=/path";
/* Expect "href", consumed 4 bytes, stop on '=' */
assert_name_parse(data, sizeof(data) - 1, 1, "href", 4, '=');
}
/* Stop on whitespace for non-attr parsing; lowercasing of ASCII letters only */
void test_htmlParseHTMLName_stop_on_space(void) {
static const unsigned char data[] = "name value";
/* Expect "name", consumed 4 bytes, stop on space */
assert_name_parse(data, sizeof(data) - 1, 0, "name", 4, ' ');
}
/* Stop on '/' after some characters (e.g., before '/>' sequence) */
void test_htmlParseHTMLName_stop_on_slash(void) {
static const unsigned char data[] = "br/>";
/* Expect "br", consumed 2 bytes, stop on '/' */
assert_name_parse(data, sizeof(data) - 1, 0, "br", 2, '/');
}
/* Valid multibyte UTF-8 should be preserved; ASCII lowercased only */
void test_htmlParseHTMLName_valid_utf8_preserved(void) {
/* "Straße>" in UTF-8: 53 74 72 61 C3 9F 65 3E */
static const unsigned char data[] = { 'S','t','r','a', 0xC3,0x9F, 'e', '>' };
/* Expected name: "straße" => 73 74 72 61 C3 9F 65 */
static const char expected[] = { 's','t','r','a', (char)0xC3,(char)0x9F, 'e', '\0' };
/* Consumed 7 bytes (up to before '>'), stop on '>' */
assert_name_parse(data, sizeof(data), 0, expected, 7, '>');
}
/* Invalid UTF-8 bytes should be replaced with U+FFFD (EF BF BD) per invalid sequence handling */
void test_htmlParseHTMLName_invalid_utf8_replacement(void) {
/* Two invalid bytes followed by '>' */
static const unsigned char data[] = { 0xC0, 0xAF, '>' };
/* Each invalid byte results in a replacement char, so 2 replacements total */
static const char expected[] = { (char)0xEF,(char)0xBF,(char)0xBD,
(char)0xEF,(char)0xBF,(char)0xBD, '\0' };
/* Consumed 2 bytes (invalid ones), stop on '>' */
assert_name_parse(data, sizeof(data), 0, expected, 2, '>');
}
/* Embedded NUL bytes in input are turned into the UTF-8 replacement character in the output */
void test_htmlParseHTMLName_embedded_nul_replacement(void) {
/* "ab\0cd " -> expect "ab" + U+FFFD + "cd"; stops at trailing space */
static const unsigned char data[] = { 'a','b', 0x00, 'c','d',' ' };
static const char expected[] = { 'a','b', (char)0xEF,(char)0xBF,(char)0xBD, 'c','d', '\0' };
/* Consumed 5 bytes (up to before space), stop on ' ' */
assert_name_parse(data, sizeof(data), 0, expected, 5, ' ');
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseHTMLName_ascii_tag_lowercase_stop_on_gt);
RUN_TEST(test_htmlParseHTMLName_attr_stop_on_equal);
RUN_TEST(test_htmlParseHTMLName_stop_on_space);
RUN_TEST(test_htmlParseHTMLName_stop_on_slash);
RUN_TEST(test_htmlParseHTMLName_valid_utf8_preserved);
RUN_TEST(test_htmlParseHTMLName_invalid_utf8_replacement);
RUN_TEST(test_htmlParseHTMLName_embedded_nul_replacement);
int rc = UNITY_END();
/* Cleanup libxml2 global state */
xmlCleanupParser();
return rc;
} |