File size: 5,800 Bytes
6baed57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/dict.h>

#include <string.h>
#include <stdlib.h>
#include <stdio.h>

/* Wrapper for the static function provided in the source module */
extern xmlHashedString test_htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr);

static void assert_name_parse(const unsigned char *data, size_t len,
                              int attr,
                              const char *expected_name,
                              size_t expected_advance,
                              unsigned char expected_stop)
{
    htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt((const char *)data, (int)len);
    TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context");
    TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->dict, "Parser context dictionary is NULL");
    TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->input, "Parser input is NULL");
    TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->input->base, "Parser input base is NULL");

    const unsigned char *start = ctxt->input->base;
    const unsigned char *before = ctxt->input->cur;

    xmlHashedString ret = test_htmlParseHTMLName(ctxt, attr);

    /* Verify the returned name */
    TEST_ASSERT_NOT_NULL_MESSAGE(ret.name, "Returned name is NULL (memory error?)");
    TEST_ASSERT_EQUAL_STRING_MESSAGE(expected_name, (const char *)ret.name,
                                     "Parsed name doesn't match expected");

    /* Verify input advancement and stop character */
    size_t advanced = (size_t)(ctxt->input->cur - start);
    size_t expected_adv = expected_advance;
    TEST_ASSERT_EQUAL_size_t_MESSAGE(expected_adv, advanced, "Input pointer advanced by unexpected amount");

    /* Ensure we didn't consume the stop character */
    if (advanced < len) {
        TEST_ASSERT_EQUAL_UINT8_MESSAGE(expected_stop, (unsigned char)ctxt->input->cur[0],
                                        "Did not stop at expected stop character");
    } else {
        /* If we advanced to end, ensure expected stop indicates end (no stop char available) */
        TEST_FAIL_MESSAGE("Input advanced to end unexpectedly; expected a stop character to remain");
    }

    htmlFreeParserCtxt(ctxt);
}

/* Unity setup/teardown */
void setUp(void) {
    /* no-op */
}
void tearDown(void) {
    /* no-op */
}

/* Tests */

/* ASCII name should be lowercased; stop on '>' without consuming it */
void test_htmlParseHTMLName_ascii_tag_lowercase_stop_on_gt(void) {
    static const unsigned char data[] = "DIV>";
    /* Expect "div", consumed 3 bytes, stop char '>' at index 3 */
    assert_name_parse(data, sizeof(data) - 1, 0, "div", 3, '>');
}

/* Attribute name parsing stops at '=' without consuming it; ASCII lowercasing applies */
void test_htmlParseHTMLName_attr_stop_on_equal(void) {
    static const unsigned char data[] = "HREF=/path";
    /* Expect "href", consumed 4 bytes, stop on '=' */
    assert_name_parse(data, sizeof(data) - 1, 1, "href", 4, '=');
}

/* Stop on whitespace for non-attr parsing; lowercasing of ASCII letters only */
void test_htmlParseHTMLName_stop_on_space(void) {
    static const unsigned char data[] = "name value";
    /* Expect "name", consumed 4 bytes, stop on space */
    assert_name_parse(data, sizeof(data) - 1, 0, "name", 4, ' ');
}

/* Stop on '/' after some characters (e.g., before '/>' sequence) */
void test_htmlParseHTMLName_stop_on_slash(void) {
    static const unsigned char data[] = "br/>";
    /* Expect "br", consumed 2 bytes, stop on '/' */
    assert_name_parse(data, sizeof(data) - 1, 0, "br", 2, '/');
}

/* Valid multibyte UTF-8 should be preserved; ASCII lowercased only */
void test_htmlParseHTMLName_valid_utf8_preserved(void) {
    /* "Straße>" in UTF-8: 53 74 72 61 C3 9F 65 3E */
    static const unsigned char data[] = { 'S','t','r','a', 0xC3,0x9F, 'e', '>' };
    /* Expected name: "straße" => 73 74 72 61 C3 9F 65 */
    static const char expected[] = { 's','t','r','a', (char)0xC3,(char)0x9F, 'e', '\0' };
    /* Consumed 7 bytes (up to before '>'), stop on '>' */
    assert_name_parse(data, sizeof(data), 0, expected, 7, '>');
}

/* Invalid UTF-8 bytes should be replaced with U+FFFD (EF BF BD) per invalid sequence handling */
void test_htmlParseHTMLName_invalid_utf8_replacement(void) {
    /* Two invalid bytes followed by '>' */
    static const unsigned char data[] = { 0xC0, 0xAF, '>' };
    /* Each invalid byte results in a replacement char, so 2 replacements total */
    static const char expected[] = { (char)0xEF,(char)0xBF,(char)0xBD,
                                     (char)0xEF,(char)0xBF,(char)0xBD, '\0' };
    /* Consumed 2 bytes (invalid ones), stop on '>' */
    assert_name_parse(data, sizeof(data), 0, expected, 2, '>');
}

/* Embedded NUL bytes in input are turned into the UTF-8 replacement character in the output */
void test_htmlParseHTMLName_embedded_nul_replacement(void) {
    /* "ab\0cd " -> expect "ab" + U+FFFD + "cd"; stops at trailing space */
    static const unsigned char data[] = { 'a','b', 0x00, 'c','d',' ' };
    static const char expected[] = { 'a','b', (char)0xEF,(char)0xBF,(char)0xBD, 'c','d', '\0' };
    /* Consumed 5 bytes (up to before space), stop on ' ' */
    assert_name_parse(data, sizeof(data), 0, expected, 5, ' ');
}

int main(void) {
    UNITY_BEGIN();

    RUN_TEST(test_htmlParseHTMLName_ascii_tag_lowercase_stop_on_gt);
    RUN_TEST(test_htmlParseHTMLName_attr_stop_on_equal);
    RUN_TEST(test_htmlParseHTMLName_stop_on_space);
    RUN_TEST(test_htmlParseHTMLName_stop_on_slash);
    RUN_TEST(test_htmlParseHTMLName_valid_utf8_preserved);
    RUN_TEST(test_htmlParseHTMLName_invalid_utf8_replacement);
    RUN_TEST(test_htmlParseHTMLName_embedded_nul_replacement);

    int rc = UNITY_END();
    /* Cleanup libxml2 global state */
    xmlCleanupParser();
    return rc;
}