#include "unity/unity.h" #include #include #include #include #include /* Helper: compare raw output to an expected C string */ static void assert_output_equals(const unsigned char *out, int outlen, const char *expected) { size_t exp_len = strlen(expected); TEST_ASSERT_EQUAL_INT((int)exp_len, outlen); TEST_ASSERT_EQUAL_UINT8_ARRAY(expected, out, exp_len); } /* Helper: check if output matches one of two expected strings */ static void assert_output_equals_either(const unsigned char *out, int outlen, const char *exp1, const char *exp2) { size_t len1 = strlen(exp1); size_t len2 = strlen(exp2); bool match1 = (outlen == (int)len1) && (memcmp(out, exp1, len1) == 0); bool match2 = (outlen == (int)len2) && (memcmp(out, exp2, len2) == 0); if (!(match1 || match2)) { /* Provide informative failure with a printable copy */ char buf[64]; int n = outlen < (int)sizeof(buf) - 1 ? outlen : (int)sizeof(buf) - 1; memcpy(buf, out, n); buf[n] = '\0'; TEST_FAIL_MESSAGE("Output did not match either expected entity representation"); } } void setUp(void) { /* Setup code here, or leave empty */ } void tearDown(void) { /* Cleanup code here, or leave empty */ } /* Test: in == NULL triggers initialization: ret == 0, outlen and inlen set to 0 */ void test_htmlUTF8ToHtml_initialization_with_null_in(void) { unsigned char outbuf[16] = {0}; int outLen = (int)sizeof(outbuf); int inLen = 123; /* arbitrary non-zero */ int ret = htmlUTF8ToHtml(outbuf, &outLen, NULL, &inLen); TEST_ASSERT_EQUAL_INT(0, ret); TEST_ASSERT_EQUAL_INT(0, outLen); TEST_ASSERT_EQUAL_INT(0, inLen); } /* Test: out == NULL should return an error (<0) without crashing */ void test_htmlUTF8ToHtml_null_out_pointer_returns_error(void) { const unsigned char inbuf[] = { 'A' }; int outLen = 16; int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(NULL, &outLen, inbuf, &inLen); TEST_ASSERT_LESS_THAN_INT(0, ret); } /* Test: ASCII passthrough */ void test_htmlUTF8ToHtml_ascii_passthrough(void) { const unsigned char inbuf[] = "Hello"; unsigned char outbuf[64]; int outLen = (int)sizeof(outbuf); int inLen = (int)strlen((const char *)inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); TEST_ASSERT_EQUAL_INT(5, ret); TEST_ASSERT_EQUAL_INT(5, outLen); TEST_ASSERT_EQUAL_INT(5, inLen); /* Make a temporary NUL-terminated copy to compare as string */ unsigned char tmp[64]; memcpy(tmp, outbuf, outLen); tmp[outLen] = '\0'; TEST_ASSERT_EQUAL_STRING("Hello", (const char *)tmp); } /* Test: basic 2-byte UTF-8 -> entity (© or ©) */ void test_htmlUTF8ToHtml_two_byte_utf8_to_entity_copy(void) { const unsigned char inbuf[] = { 0xC2, 0xA9 }; /* U+00A9 COPYRIGHT SIGN */ unsigned char outbuf[32] = {0}; int outLen = (int)sizeof(outbuf); int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); TEST_ASSERT_EQUAL_INT(outLen, ret); TEST_ASSERT_EQUAL_INT(2, inLen); /* Accept either named or numeric */ assert_output_equals_either(outbuf, outLen, "©", "©"); } /* Test: mixed ASCII and 2-byte UTF-8 -> entity sequence */ void test_htmlUTF8ToHtml_mixed_ascii_and_entity(void) { const unsigned char inbuf[] = { 'A', 0xC2, 0xA9, 'B' }; /* "A©B" */ unsigned char outbuf[64] = {0}; int outLen = (int)sizeof(outbuf); int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); TEST_ASSERT_EQUAL_INT(outLen, ret); TEST_ASSERT_EQUAL_INT(4, inLen); /* Expect "A©B" or "A©B" */ const char *exp1 = "A©B"; const char *exp2 = "A©B"; assert_output_equals_either(outbuf, outLen, exp1, exp2); } /* Test: 4-byte UTF-8 (U+1F600 GRINNING FACE) -> numeric entity */ void test_htmlUTF8ToHtml_four_byte_utf8_to_numeric_entity(void) { const unsigned char inbuf[] = { 0xF0, 0x9F, 0x98, 0x80 }; /* U+1F600 */ unsigned char outbuf[32] = {0}; int outLen = (int)sizeof(outbuf); int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); TEST_ASSERT_EQUAL_INT(outLen, ret); TEST_ASSERT_EQUAL_INT(4, inLen); assert_output_equals(outbuf, outLen, "😀"); } /* Test: incomplete multibyte sequence is not consumed and no error; prior ASCII remains */ void test_htmlUTF8ToHtml_incomplete_multibyte_sequence(void) { const unsigned char inbuf[] = { 'A', 0xC2 }; /* 'A' followed by first byte of 2-byte sequence */ unsigned char outbuf[16] = {0}; int outLen = (int)sizeof(outbuf); int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); /* Should process 'A' and stop before incomplete sequence */ TEST_ASSERT_EQUAL_INT(1, ret); TEST_ASSERT_EQUAL_INT(1, outLen); TEST_ASSERT_EQUAL_INT(1, inLen); TEST_ASSERT_EQUAL_HEX8('A', outbuf[0]); } /* Test: output buffer too small for entity -> error, nothing consumed */ void test_htmlUTF8ToHtml_outbuf_too_small_for_entity(void) { const unsigned char inbuf[] = { 0xC2, 0xA9 }; /* © -> requires 6 bytes (© or ©) */ unsigned char outbuf[5] = {0}; /* deliberately too small */ int outLen = (int)sizeof(outbuf); int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); TEST_ASSERT_LESS_THAN_INT(0, ret); /* space error */ TEST_ASSERT_EQUAL_INT(0, outLen); TEST_ASSERT_EQUAL_INT(0, inLen); } /* Test: out buffer too small mid-ASCII -> error, partial consumption accounted */ void test_htmlUTF8ToHtml_outbuf_too_small_mid_ascii(void) { const unsigned char inbuf[] = { 'A', 'B', 'C' }; unsigned char outbuf[2] = {0}; int outLen = (int)sizeof(outbuf); int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); TEST_ASSERT_LESS_THAN_INT(0, ret); /* space error */ TEST_ASSERT_EQUAL_INT(2, outLen); TEST_ASSERT_EQUAL_INT(2, inLen); TEST_ASSERT_EQUAL_HEX8('A', outbuf[0]); TEST_ASSERT_EQUAL_HEX8('B', outbuf[1]); } /* Test: exact buffer boundary for entity succeeds */ void test_htmlUTF8ToHtml_exact_boundary_for_entity(void) { const unsigned char inbuf[] = { 0xC2, 0xA9 }; /* © */ unsigned char outbuf[6] = {0}; /* exactly enough for © or © */ int outLen = (int)sizeof(outbuf); int inLen = (int)sizeof(inbuf); int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen); TEST_ASSERT_EQUAL_INT(6, ret); TEST_ASSERT_EQUAL_INT(6, outLen); TEST_ASSERT_EQUAL_INT(2, inLen); assert_output_equals_either(outbuf, outLen, "©", "©"); } int main(void) { UNITY_BEGIN(); RUN_TEST(test_htmlUTF8ToHtml_initialization_with_null_in); RUN_TEST(test_htmlUTF8ToHtml_null_out_pointer_returns_error); RUN_TEST(test_htmlUTF8ToHtml_ascii_passthrough); RUN_TEST(test_htmlUTF8ToHtml_two_byte_utf8_to_entity_copy); RUN_TEST(test_htmlUTF8ToHtml_mixed_ascii_and_entity); RUN_TEST(test_htmlUTF8ToHtml_four_byte_utf8_to_numeric_entity); RUN_TEST(test_htmlUTF8ToHtml_incomplete_multibyte_sequence); RUN_TEST(test_htmlUTF8ToHtml_outbuf_too_small_for_entity); RUN_TEST(test_htmlUTF8ToHtml_outbuf_too_small_mid_ascii); RUN_TEST(test_htmlUTF8ToHtml_exact_boundary_for_entity); return UNITY_END(); }