|
|
|
|
|
|
|
|
|
|
|
|
|
#include <fcntl.h> |
|
#include <math.h> |
|
#include <getopt.h> |
|
#include <sys/time.h> |
|
#include <sys/types.h> |
|
#include <sys/uio.h> |
|
#include <unistd.h> |
|
#include <assert.h> |
|
|
|
#include <cstdarg> |
|
#include <cstdio> |
|
#include <cstdlib> |
|
#include <climits> |
|
#include <fstream> |
|
#include <iomanip> |
|
#include <iostream> |
|
#include <map> |
|
#include <memory> |
|
#include <sstream> |
|
#include <string> |
|
#include <unordered_set> |
|
#include <vector> |
|
#include <numeric> |
|
#include <algorithm> |
|
|
|
#include "tensorflow/lite/builtin_op_data.h" |
|
#include "tensorflow/lite/interpreter.h" |
|
#include "tensorflow/lite/kernels/register.h" |
|
#include "tensorflow/lite/string_util.h" |
|
|
|
#include "deeplabSegment.h" |
|
#define STB_IMAGE_IMPLEMENTATION |
|
#include "stb_image.h" |
|
#define STB_IMAGE_RESIZE_IMPLEMENTATION |
|
#include "stb_image_resize.h" |
|
#define STB_IMAGE_WRITE_IMPLEMENTATION |
|
#include "stb_image_write.h" |
|
|
|
#define LOG(x) std::cout |
|
|
|
namespace deeplabSegment { |
|
|
|
double get_us(struct timeval t) |
|
{ |
|
return (t.tv_sec * 1000000 + t.tv_usec); |
|
} |
|
|
|
|
|
|
|
void deeplab_postprocess(const TfLiteTensor* mask_tensor, uint8_t* mask_array, std::vector<uint8_t> &class_indexes) |
|
{ |
|
|
|
|
|
|
|
|
|
|
|
const float* data = reinterpret_cast<float*>(mask_tensor->data.raw); |
|
|
|
TfLiteIntArray* output_dims = mask_tensor->dims; |
|
int batch = output_dims->data[0]; |
|
int height = output_dims->data[1]; |
|
int width = output_dims->data[2]; |
|
int channel = output_dims->data[3]; |
|
auto unit = sizeof(float); |
|
|
|
|
|
auto bytesPerRow = channel * unit; |
|
auto bytesPerImage = width * bytesPerRow; |
|
auto bytesPerBatch = height * bytesPerImage; |
|
|
|
|
|
assert(mask_array != nullptr); |
|
bzero((void*)mask_array, height * width * 1 * sizeof(uint8_t)); |
|
|
|
for (int b = 0; b < batch; b++) { |
|
auto bytes = data + b * bytesPerBatch / unit; |
|
LOG(INFO) << "batch " << b << "\n"; |
|
|
|
for (int h = 0; h < height; h++) { |
|
for (int w = 0; w < width; w++) { |
|
|
|
int class_scores_offset, class_scores_step; |
|
|
|
class_scores_offset = h * width * channel + w * channel; |
|
class_scores_step = 1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
uint8_t class_index = 0; |
|
float max_score = 0.0; |
|
for (int i = 0; i < channel; i++) { |
|
if (bytes[class_scores_offset + i * class_scores_step] > max_score) { |
|
class_index = i; |
|
max_score = bytes[class_scores_offset + i * class_scores_step]; |
|
} |
|
} |
|
int mask_offset = h * width + w; |
|
mask_array[mask_offset] = class_index; |
|
|
|
if(class_index != 0 && std::count(class_indexes.begin(), class_indexes.end(), class_index) == 0) { |
|
class_indexes.emplace_back(class_index); |
|
} |
|
} |
|
} |
|
} |
|
return; |
|
} |
|
|
|
|
|
|
|
uint8_t* image_resize(uint8_t* inputImage, int image_width, int image_height, int image_channel, int input_width, int input_height, int input_channel) |
|
{ |
|
|
|
assert(image_channel == input_channel); |
|
|
|
uint8_t* input_image = (uint8_t*)malloc(input_height * input_width * input_channel * sizeof(uint8_t)); |
|
if (input_image == nullptr) { |
|
LOG(ERROR) << "Can't alloc memory\n"; |
|
exit(-1); |
|
} |
|
stbir_resize_uint8(inputImage, image_width, image_height, 0, |
|
input_image, input_width, input_height, 0, image_channel); |
|
|
|
return input_image; |
|
} |
|
|
|
|
|
template <class T> |
|
void fill_data(T* out, uint8_t* in, int input_width, int input_height, |
|
int input_channels, Settings* s) { |
|
auto output_number_of_pixels = input_height * input_width * input_channels; |
|
|
|
for (int i = 0; i < output_number_of_pixels; i++) { |
|
if (s->input_floating) |
|
out[i] = (in[i] - s->input_mean) / s->input_std; |
|
else |
|
out[i] = (uint8_t)in[i]; |
|
} |
|
|
|
return; |
|
} |
|
|
|
|
|
void RunInference(Settings* s) { |
|
if (!s->model_name.c_str()) { |
|
LOG(ERROR) << "no model file name\n"; |
|
exit(-1); |
|
} |
|
|
|
|
|
std::unique_ptr<tflite::FlatBufferModel> model; |
|
std::unique_ptr<tflite::Interpreter> interpreter; |
|
model = tflite::FlatBufferModel::BuildFromFile(s->model_name.c_str()); |
|
if (!model) { |
|
LOG(FATAL) << "\nFailed to mmap model " << s->model_name << "\n"; |
|
exit(-1); |
|
} |
|
|
|
LOG(INFO) << "Loaded model " << s->model_name << "\n"; |
|
model->error_reporter(); |
|
LOG(INFO) << "resolved reporter\n"; |
|
|
|
|
|
tflite::ops::builtin::BuiltinOpResolver resolver; |
|
tflite::InterpreterBuilder(*model, resolver)(&interpreter); |
|
if (!interpreter) { |
|
LOG(FATAL) << "Failed to construct interpreter\n"; |
|
exit(-1); |
|
} |
|
|
|
interpreter->SetAllowFp16PrecisionForFp32(s->allow_fp16); |
|
if (s->number_of_threads != -1) { |
|
interpreter->SetNumThreads(s->number_of_threads); |
|
} |
|
|
|
if (interpreter->AllocateTensors() != kTfLiteOk) { |
|
LOG(FATAL) << "Failed to allocate tensors!"; |
|
} |
|
|
|
|
|
std::vector<std::string> classes; |
|
classes.emplace_back("background"); |
|
std::ifstream classesOs(s->classes_file_name.c_str()); |
|
std::string line; |
|
while (std::getline(classesOs, line)) { |
|
classes.emplace_back(line); |
|
} |
|
int num_classes = classes.size(); |
|
LOG(INFO) << "num_classes: " << num_classes << "\n"; |
|
|
|
|
|
|
|
const std::vector<int> inputs = interpreter->inputs(); |
|
assert(inputs.size() == 1); |
|
|
|
|
|
int input = interpreter->inputs()[0]; |
|
TfLiteIntArray* dims = interpreter->tensor(input)->dims; |
|
int input_batch = dims->data[0]; |
|
int input_height = dims->data[1]; |
|
int input_width = dims->data[2]; |
|
int input_channels = dims->data[3]; |
|
|
|
if (s->verbose) LOG(INFO) << "input tensor info: " |
|
<< "type " << interpreter->tensor(input)->type << ", " |
|
<< "batch " << input_batch << ", " |
|
<< "height " << input_height << ", " |
|
<< "width " << input_width << ", " |
|
<< "channels " << input_channels << "\n"; |
|
|
|
|
|
int image_width, image_height, image_channel; |
|
|
|
auto input_image = (uint8_t*)stbi_load(s->input_img_name.c_str(), &image_width, &image_height, &image_channel, 3); |
|
if (input_image == nullptr) { |
|
LOG(FATAL) << "Can't open" << s->input_img_name << "\n"; |
|
exit(-1); |
|
} |
|
|
|
LOG(INFO) << "origin image size: width:" << image_width |
|
<< ", height:" << image_height |
|
<< ", channel:" << image_channel |
|
<< "\n"; |
|
|
|
|
|
uint8_t* resizeImage = image_resize(input_image, image_width, image_height, image_channel, input_width, input_height, input_channels); |
|
|
|
|
|
stbi_image_free(input_image); |
|
input_image = nullptr; |
|
|
|
|
|
switch (interpreter->tensor(input)->type) { |
|
case kTfLiteFloat32: |
|
s->input_floating = true; |
|
fill_data<float>(interpreter->typed_tensor<float>(input), resizeImage, |
|
input_width, input_height, input_channels, s); |
|
break; |
|
case kTfLiteUInt8: |
|
fill_data<uint8_t>(interpreter->typed_tensor<uint8_t>(input), resizeImage, |
|
input_width, input_height, input_channels, s); |
|
break; |
|
default: |
|
LOG(FATAL) << "cannot handle input type " |
|
<< interpreter->tensor(input)->type << " yet"; |
|
exit(-1); |
|
} |
|
|
|
|
|
if (s->loop_count > 1) |
|
for (int i = 0; i < s->number_of_warmup_runs; i++) { |
|
if (interpreter->Invoke() != kTfLiteOk) { |
|
LOG(FATAL) << "Failed to invoke tflite!\n"; |
|
} |
|
} |
|
|
|
|
|
struct timeval start_time, stop_time; |
|
gettimeofday(&start_time, nullptr); |
|
for (int i = 0; i < s->loop_count; i++) { |
|
if (interpreter->Invoke() != kTfLiteOk) { |
|
LOG(FATAL) << "Failed to invoke tflite!\n"; |
|
} |
|
} |
|
gettimeofday(&stop_time, nullptr); |
|
LOG(INFO) << "invoked average time:" << (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000) << " ms \n"; |
|
|
|
|
|
|
|
|
|
const std::vector<int> outputs = interpreter->outputs(); |
|
assert(outputs.size() == 1); |
|
|
|
|
|
assert(mask_output->type == kTfLiteFloat32); |
|
|
|
int output = interpreter->outputs()[0]; |
|
TfLiteTensor* mask_output = interpreter->tensor(output); |
|
|
|
TfLiteIntArray* output_dims = mask_output->dims; |
|
int mask_batch = output_dims->data[0]; |
|
int mask_height = output_dims->data[1]; |
|
int mask_width = output_dims->data[2]; |
|
int mask_channels = output_dims->data[3]; |
|
|
|
if (s->verbose) LOG(INFO) << "output tensor info: " |
|
<< "name " << mask_output->name << ", " |
|
<< "type " << mask_output->type << ", " |
|
<< "batch " << mask_batch << ", " |
|
<< "height " << mask_height << ", " |
|
<< "width " << mask_width << ", " |
|
<< "channels " << mask_channels << "\n"; |
|
|
|
|
|
|
|
assert(num_classes == mask_channels); |
|
|
|
|
|
uint8_t* mask_array = (uint8_t*)malloc(mask_height * mask_width * 1 * sizeof(uint8_t)); |
|
if (mask_array == nullptr) { |
|
LOG(ERROR) << "Can't alloc memory\n"; |
|
exit(-1); |
|
} |
|
std::vector<uint8_t> class_indexes; |
|
|
|
|
|
gettimeofday(&start_time, nullptr); |
|
deeplab_postprocess(mask_output, mask_array, class_indexes); |
|
gettimeofday(&stop_time, nullptr); |
|
LOG(INFO) << "deeplab_postprocess time: " << (get_us(stop_time) - get_us(start_time)) / 1000 << " ms\n"; |
|
|
|
int save_width, save_height; |
|
if (s->keep_shape) { |
|
|
|
uint8_t* origin_mask_array = image_resize(mask_array, mask_width, mask_height, 1, image_width, image_height, 1); |
|
|
|
free(mask_array); |
|
mask_array = origin_mask_array; |
|
save_width = image_width; |
|
save_height = image_height; |
|
} else { |
|
save_width = mask_width; |
|
save_height = mask_height; |
|
} |
|
|
|
|
|
LOG(INFO) << "Segment class:\n"; |
|
for(auto class_index : class_indexes) { |
|
LOG(INFO) << classes[class_index] << "\n"; |
|
} |
|
|
|
|
|
stbi_write_png(s->mask_img_name.c_str(), save_width, save_height, 1, mask_array, 0); |
|
LOG(INFO) << "Segmentation result has been saved to: " << s->mask_img_name << "\n"; |
|
|
|
return; |
|
} |
|
|
|
void display_usage() { |
|
LOG(INFO) |
|
<< "Usage: deeplabSegment\n" |
|
<< "--tflite_model, -m: model_name.tflite\n" |
|
<< "--image, -i: image_name.jpg\n" |
|
<< "--classes, -l: classes labels for the model\n" |
|
<< "--input_mean, -b: input mean\n" |
|
<< "--input_std, -s: input standard deviation\n" |
|
<< "--allow_fp16, -f: [0|1], allow running fp32 models with fp16 or not\n" |
|
<< "--threads, -t: number of threads\n" |
|
<< "--count, -c: loop interpreter->Invoke() for certain times\n" |
|
<< "--warmup_runs, -w: number of warmup runs\n" |
|
<< "--mask, -k: mask png file to save segment output\n" |
|
<< "--keep_shape, -p: [0|1] keep predict mask as the same shape of input image\n" |
|
<< "--verbose, -v: [0|1] print more information\n" |
|
<< "\n"; |
|
} |
|
|
|
|
|
int Main(int argc, char** argv) { |
|
Settings s; |
|
|
|
int c; |
|
while (1) { |
|
static struct option long_options[] = { |
|
{"tflite_model", required_argument, nullptr, 'm'}, |
|
{"image", required_argument, nullptr, 'i'}, |
|
{"classes", required_argument, nullptr, 'l'}, |
|
{"input_mean", required_argument, nullptr, 'b'}, |
|
{"input_std", required_argument, nullptr, 's'}, |
|
{"threads", required_argument, nullptr, 't'}, |
|
{"allow_fp16", required_argument, nullptr, 'f'}, |
|
{"count", required_argument, nullptr, 'c'}, |
|
{"warmup_runs", required_argument, nullptr, 'w'}, |
|
{"mask", required_argument, nullptr, 'k'}, |
|
{"keep_shape", required_argument, nullptr, 'p'}, |
|
{"verbose", required_argument, nullptr, 'v'}, |
|
{"help", no_argument, nullptr, 'h'}, |
|
{nullptr, 0, nullptr, 0}}; |
|
|
|
|
|
int option_index = 0; |
|
|
|
c = getopt_long(argc, argv, |
|
"b:c:f:i:hk:l:m:p:s:t:v:w:", long_options, |
|
&option_index); |
|
|
|
|
|
if (c == -1) break; |
|
|
|
switch (c) { |
|
case 'b': |
|
s.input_mean = strtod(optarg, nullptr); |
|
break; |
|
case 'c': |
|
s.loop_count = |
|
strtol(optarg, nullptr, 10); |
|
break; |
|
case 'f': |
|
s.allow_fp16 = |
|
strtol(optarg, nullptr, 10); |
|
break; |
|
case 'i': |
|
s.input_img_name = optarg; |
|
break; |
|
case 'l': |
|
s.classes_file_name = optarg; |
|
break; |
|
case 'm': |
|
s.model_name = optarg; |
|
break; |
|
case 's': |
|
s.input_std = strtod(optarg, nullptr); |
|
break; |
|
case 't': |
|
s.number_of_threads = strtol( |
|
optarg, nullptr, 10); |
|
break; |
|
case 'v': |
|
s.verbose = |
|
strtol(optarg, nullptr, 10); |
|
break; |
|
case 'w': |
|
s.number_of_warmup_runs = |
|
strtol(optarg, nullptr, 10); |
|
break; |
|
case 'p': |
|
s.keep_shape = |
|
strtol(optarg, nullptr, 10); |
|
break; |
|
case 'k': |
|
s.mask_img_name = optarg; |
|
break; |
|
case 'h': |
|
case '?': |
|
default: |
|
|
|
display_usage(); |
|
exit(-1); |
|
exit(-1); |
|
} |
|
} |
|
RunInference(&s); |
|
return 0; |
|
} |
|
|
|
} |
|
|
|
int main(int argc, char** argv) { |
|
return deeplabSegment::Main(argc, argv); |
|
} |
|
|