libxml / tests /tests_HTMLtree_htmlDocDump.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLtree.h>
#include <libxml/tree.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Unity required functions */
void setUp(void) {
/* Setup code here, or leave empty */
}
void tearDown(void) {
/* Cleanup code here, or leave empty */
}
/* Helper: create a temporary file path and open it for writing (binary).
* Returns 1 on success, 0 on failure. On success, outPath is set and *outF is opened. */
static int create_temp_file(char *outPath, size_t outPathSize, FILE **outF) {
if (outPath == NULL || outF == NULL || outPathSize == 0)
return 0;
/* Use tmpnam to get a unique path (adequate for test purposes). */
char tmpbuf[L_tmpnam];
#if defined(_MSC_VER)
if (tmpnam_s(tmpbuf, L_tmpnam) != 0)
return 0;
#else
if (tmpnam(tmpbuf) == NULL)
return 0;
#endif
/* Copy to caller-provided buffer */
strncpy(outPath, tmpbuf, outPathSize - 1);
outPath[outPathSize - 1] = '\0';
FILE *f = fopen(outPath, "wb");
if (f == NULL)
return 0;
*outF = f;
return 1;
}
/* Helper: read entire file into a newly allocated buffer; returns NULL on failure.
* The returned buffer is NUL-terminated; length (optional) is set to the file size. */
static char *read_file_to_string(const char *path, long *length) {
if (path == NULL)
return NULL;
FILE *f = fopen(path, "rb");
if (f == NULL)
return NULL;
if (fseek(f, 0, SEEK_END) != 0) {
fclose(f);
return NULL;
}
long size = ftell(f);
if (size < 0) {
fclose(f);
return NULL;
}
rewind(f);
char *buf = (char *)malloc((size_t)size + 1);
if (buf == NULL) {
fclose(f);
return NULL;
}
size_t nread = fread(buf, 1, (size_t)size, f);
fclose(f);
buf[nread] = '\0';
if (length)
*length = (long)nread;
return buf;
}
/* Helper: build a simple HTML document with <html><body><p>Hello</p></body></html> */
static xmlDocPtr build_simple_html_doc(void) {
xmlDocPtr doc = htmlNewDocNoDtD(NULL, NULL);
if (doc == NULL)
return NULL;
xmlNodePtr root = htmlNewDocNode(doc, NULL, BAD_CAST "html", NULL);
if (root == NULL) {
xmlFreeDoc(doc);
return NULL;
}
xmlDocSetRootElement(doc, root);
xmlNodePtr body = htmlNewDocNode(doc, NULL, BAD_CAST "body", NULL);
if (body == NULL) {
xmlFreeDoc(doc);
return NULL;
}
xmlAddChild(root, body);
xmlNodePtr p = htmlNewDocNode(doc, NULL, BAD_CAST "p", NULL);
if (p == NULL) {
xmlFreeDoc(doc);
return NULL;
}
xmlAddChild(body, p);
xmlNodePtr text = xmlNewText(BAD_CAST "Hello");
if (text == NULL) {
xmlFreeDoc(doc);
return NULL;
}
xmlAddChild(p, text);
return doc;
}
/* Test: Passing NULL document should return -1 and not close the file. */
static void test_htmlDocDump_null_doc_returns_minus1(void) {
char path[256];
FILE *f = NULL;
TEST_ASSERT_TRUE_MESSAGE(create_temp_file(path, sizeof(path), &f), "Failed to create temp file");
int ret = htmlDocDump(f, NULL);
TEST_ASSERT_EQUAL_INT_MESSAGE(-1, ret, "htmlDocDump should return -1 for NULL doc");
/* Since the function returned early, it should not have closed the FILE*. We close it now. */
TEST_ASSERT_EQUAL_INT_MESSAGE(0, fflush(f), "Flushing file failed");
TEST_ASSERT_EQUAL_INT_MESSAGE(0, fclose(f), "Closing file failed");
/* File should be empty (no output) */
long size = 0;
char *contents = read_file_to_string(path, &size);
TEST_ASSERT_NOT_NULL_MESSAGE(contents, "Failed to read temp file");
TEST_ASSERT_EQUAL_INT_MESSAGE(0, (int)size, "File should be empty when doc is NULL");
free(contents);
remove(path);
}
/* Test: Passing NULL file should return -1. */
static void test_htmlDocDump_null_file_returns_minus1(void) {
xmlDocPtr doc = build_simple_html_doc();
TEST_ASSERT_NOT_NULL_MESSAGE(doc, "Failed to build HTML doc");
int ret = htmlDocDump(NULL, doc);
TEST_ASSERT_EQUAL_INT_MESSAGE(-1, ret, "htmlDocDump should return -1 for NULL file");
xmlFreeDoc(doc);
}
/* Test: Unsupported encoding should cause htmlDocDump to return -1 and not write anything. */
static void test_htmlDocDump_invalid_encoding_returns_minus1_and_no_write(void) {
xmlDocPtr doc = build_simple_html_doc();
TEST_ASSERT_NOT_NULL_MESSAGE(doc, "Failed to build HTML doc");
/* Set an encoding that is very likely unsupported */
doc->encoding = xmlStrdup(BAD_CAST "x-unknown-encoding-xyz-1234");
char path[256];
FILE *f = NULL;
TEST_ASSERT_TRUE_MESSAGE(create_temp_file(path, sizeof(path), &f), "Failed to create temp file");
int ret = htmlDocDump(f, doc);
TEST_ASSERT_EQUAL_INT_MESSAGE(-1, ret, "htmlDocDump should return -1 for unsupported encoding");
/* The function should have returned before creating/using the output buffer, so FILE* is still open */
TEST_ASSERT_EQUAL_INT_MESSAGE(0, fflush(f), "Flushing file failed");
TEST_ASSERT_EQUAL_INT_MESSAGE(0, fclose(f), "Closing file failed");
long size = -1;
char *contents = read_file_to_string(path, &size);
TEST_ASSERT_NOT_NULL_MESSAGE(contents, "Failed to read temp file");
TEST_ASSERT_EQUAL_INT_MESSAGE(0, (int)size, "File should be empty on encoder failure");
free(contents);
remove(path);
xmlFreeDoc(doc); /* Frees doc->encoding as well */
}
/* Test: Successful dump with default (NULL) encoding should write content and return >= 0. */
static void test_htmlDocDump_writes_content_with_default_encoding(void) {
xmlDocPtr doc = build_simple_html_doc();
TEST_ASSERT_NOT_NULL_MESSAGE(doc, "Failed to build HTML doc");
/* Ensure default (NULL) encoding to trigger fallback to "HTML" */
doc->encoding = NULL;
char path[256];
FILE *f = NULL;
TEST_ASSERT_TRUE_MESSAGE(create_temp_file(path, sizeof(path), &f), "Failed to create temp file");
int ret = htmlDocDump(f, doc);
TEST_ASSERT_MESSAGE(ret >= 0, "htmlDocDump should succeed (ret >= 0) with default encoding");
/* On success, htmlDocDump likely closed the FILE*, so don't fclose(f) here. */
long size = -1;
char *contents = read_file_to_string(path, &size);
TEST_ASSERT_NOT_NULL_MESSAGE(contents, "Failed to read dumped file");
TEST_ASSERT_MESSAGE(size > 0, "Dumped file should contain data");
TEST_ASSERT_NOT_NULL_MESSAGE(strstr(contents, "Hello"), "Dumped HTML should contain text 'Hello'");
/* Also check that <html> tag appears */
TEST_ASSERT_NOT_NULL_MESSAGE(strstr(contents, "<html"), "Dumped HTML should contain <html> element");
free(contents);
remove(path);
xmlFreeDoc(doc);
}
/* Test: Successful dump with explicit UTF-8 encoding should write content and return >= 0. */
static void test_htmlDocDump_writes_content_with_utf8_encoding(void) {
xmlDocPtr doc = build_simple_html_doc();
TEST_ASSERT_NOT_NULL_MESSAGE(doc, "Failed to build HTML doc");
/* Set UTF-8 encoding explicitly */
doc->encoding = xmlStrdup(BAD_CAST "UTF-8");
char path[256];
FILE *f = NULL;
TEST_ASSERT_TRUE_MESSAGE(create_temp_file(path, sizeof(path), &f), "Failed to create temp file");
int ret = htmlDocDump(f, doc);
TEST_ASSERT_MESSAGE(ret >= 0, "htmlDocDump should succeed (ret >= 0) with UTF-8 encoding");
long size = -1;
char *contents = read_file_to_string(path, &size);
TEST_ASSERT_NOT_NULL_MESSAGE(contents, "Failed to read dumped file");
TEST_ASSERT_MESSAGE(size > 0, "Dumped file should contain data");
TEST_ASSERT_NOT_NULL_MESSAGE(strstr(contents, "Hello"), "Dumped HTML should contain text 'Hello'");
free(contents);
remove(path);
xmlFreeDoc(doc);
}
/* Main runner */
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlDocDump_null_doc_returns_minus1);
RUN_TEST(test_htmlDocDump_null_file_returns_minus1);
RUN_TEST(test_htmlDocDump_invalid_encoding_returns_minus1_and_no_write);
RUN_TEST(test_htmlDocDump_writes_content_with_default_encoding);
RUN_TEST(test_htmlDocDump_writes_content_with_utf8_encoding);
return UNITY_END();
}