g30rv17ys's picture
Add files using upload-large-folder tool
fd4b932 verified
//
// deeplabSegment.cpp
// Tensorflow-lite
//
// Created by david8862 on 2020/08/26.
//
#include <fcntl.h>
#include <math.h>
#include <getopt.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <assert.h>
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include <climits>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_set>
#include <vector>
#include <numeric>
#include <algorithm>
#include "tensorflow/lite/builtin_op_data.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/string_util.h"
#include "deeplabSegment.h"
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#include "stb_image_resize.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#define LOG(x) std::cout
namespace deeplabSegment {
double get_us(struct timeval t)
{
return (t.tv_sec * 1000000 + t.tv_usec);
}
// DeepLab postprocess for prediction mask tensor
void deeplab_postprocess(const TfLiteTensor* mask_tensor, uint8_t* mask_array, std::vector<uint8_t> &class_indexes)
{
// 1. do following transform to get the output segmentation
// mask array:
//
// mask = np.argmax(prediction, -1)
//
const float* data = reinterpret_cast<float*>(mask_tensor->data.raw);
TfLiteIntArray* output_dims = mask_tensor->dims;
int batch = output_dims->data[0];
int height = output_dims->data[1];
int width = output_dims->data[2];
int channel = output_dims->data[3];
auto unit = sizeof(float);
// TF/TFLite tensor format: NHWC
auto bytesPerRow = channel * unit;
auto bytesPerImage = width * bytesPerRow;
auto bytesPerBatch = height * bytesPerImage;
// Check and clear output mask array
assert(mask_array != nullptr);
bzero((void*)mask_array, height * width * 1 * sizeof(uint8_t));
for (int b = 0; b < batch; b++) {
auto bytes = data + b * bytesPerBatch / unit;
LOG(INFO) << "batch " << b << "\n";
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
//get bbox prediction data offset for each anchor, each feature point
int class_scores_offset, class_scores_step;
// Tensorflow format tensor, NHWC
class_scores_offset = h * width * channel + w * channel;
class_scores_step = 1;
// Get class index with max score (index 0 should be background),
// just as Python postprocess:
//
// mask = np.argmax(prediction, -1)
//
uint8_t class_index = 0;
float max_score = 0.0;
for (int i = 0; i < channel; i++) {
if (bytes[class_scores_offset + i * class_scores_step] > max_score) {
class_index = i;
max_score = bytes[class_scores_offset + i * class_scores_step];
}
}
int mask_offset = h * width + w;
mask_array[mask_offset] = class_index;
if(class_index != 0 && std::count(class_indexes.begin(), class_indexes.end(), class_index) == 0) {
class_indexes.emplace_back(class_index);
}
}
}
}
return;
}
//Resize image to model input shape
uint8_t* image_resize(uint8_t* inputImage, int image_width, int image_height, int image_channel, int input_width, int input_height, int input_channel)
{
// assume the data channel match
assert(image_channel == input_channel);
uint8_t* input_image = (uint8_t*)malloc(input_height * input_width * input_channel * sizeof(uint8_t));
if (input_image == nullptr) {
LOG(ERROR) << "Can't alloc memory\n";
exit(-1);
}
stbir_resize_uint8(inputImage, image_width, image_height, 0,
input_image, input_width, input_height, 0, image_channel);
return input_image;
}
template <class T>
void fill_data(T* out, uint8_t* in, int input_width, int input_height,
int input_channels, Settings* s) {
auto output_number_of_pixels = input_height * input_width * input_channels;
for (int i = 0; i < output_number_of_pixels; i++) {
if (s->input_floating)
out[i] = (in[i] - s->input_mean) / s->input_std;
else
out[i] = (uint8_t)in[i];
}
return;
}
void RunInference(Settings* s) {
if (!s->model_name.c_str()) {
LOG(ERROR) << "no model file name\n";
exit(-1);
}
// load model
std::unique_ptr<tflite::FlatBufferModel> model;
std::unique_ptr<tflite::Interpreter> interpreter;
model = tflite::FlatBufferModel::BuildFromFile(s->model_name.c_str());
if (!model) {
LOG(FATAL) << "\nFailed to mmap model " << s->model_name << "\n";
exit(-1);
}
//s->model = model.get();
LOG(INFO) << "Loaded model " << s->model_name << "\n";
model->error_reporter();
LOG(INFO) << "resolved reporter\n";
// prepare model interpreter
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
if (!interpreter) {
LOG(FATAL) << "Failed to construct interpreter\n";
exit(-1);
}
interpreter->SetAllowFp16PrecisionForFp32(s->allow_fp16);
if (s->number_of_threads != -1) {
interpreter->SetNumThreads(s->number_of_threads);
}
if (interpreter->AllocateTensors() != kTfLiteOk) {
LOG(FATAL) << "Failed to allocate tensors!";
}
// get classes labels and add background label
std::vector<std::string> classes;
classes.emplace_back("background");
std::ifstream classesOs(s->classes_file_name.c_str());
std::string line;
while (std::getline(classesOs, line)) {
classes.emplace_back(line);
}
int num_classes = classes.size();
LOG(INFO) << "num_classes: " << num_classes << "\n";
// assuming one input only
const std::vector<int> inputs = interpreter->inputs();
assert(inputs.size() == 1);
// get input dimension from the input tensor metadata
int input = interpreter->inputs()[0];
TfLiteIntArray* dims = interpreter->tensor(input)->dims;
int input_batch = dims->data[0];
int input_height = dims->data[1];
int input_width = dims->data[2];
int input_channels = dims->data[3];
if (s->verbose) LOG(INFO) << "input tensor info: "
<< "type " << interpreter->tensor(input)->type << ", "
<< "batch " << input_batch << ", "
<< "height " << input_height << ", "
<< "width " << input_width << ", "
<< "channels " << input_channels << "\n";
// read input image
int image_width, image_height, image_channel;
auto input_image = (uint8_t*)stbi_load(s->input_img_name.c_str(), &image_width, &image_height, &image_channel, 3);
if (input_image == nullptr) {
LOG(FATAL) << "Can't open" << s->input_img_name << "\n";
exit(-1);
}
LOG(INFO) << "origin image size: width:" << image_width
<< ", height:" << image_height
<< ", channel:" << image_channel
<< "\n";
// resize input image
uint8_t* resizeImage = image_resize(input_image, image_width, image_height, image_channel, input_width, input_height, input_channels);
// free input image
stbi_image_free(input_image);
input_image = nullptr;
// fulfill image data to model input tensor
switch (interpreter->tensor(input)->type) {
case kTfLiteFloat32:
s->input_floating = true;
fill_data<float>(interpreter->typed_tensor<float>(input), resizeImage,
input_width, input_height, input_channels, s);
break;
case kTfLiteUInt8:
fill_data<uint8_t>(interpreter->typed_tensor<uint8_t>(input), resizeImage,
input_width, input_height, input_channels, s);
break;
default:
LOG(FATAL) << "cannot handle input type "
<< interpreter->tensor(input)->type << " yet";
exit(-1);
}
// run warm up session
if (s->loop_count > 1)
for (int i = 0; i < s->number_of_warmup_runs; i++) {
if (interpreter->Invoke() != kTfLiteOk) {
LOG(FATAL) << "Failed to invoke tflite!\n";
}
}
// run model sessions to get output
struct timeval start_time, stop_time;
gettimeofday(&start_time, nullptr);
for (int i = 0; i < s->loop_count; i++) {
if (interpreter->Invoke() != kTfLiteOk) {
LOG(FATAL) << "Failed to invoke tflite!\n";
}
}
gettimeofday(&stop_time, nullptr);
LOG(INFO) << "invoked average time:" << (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000) << " ms \n";
// get output tensor info, assume only 1 output tensor (pred_mask/Softmax)
// image_input: 1 x 512 x 512 x 3
// "pred_mask/Softmax": 1 x 512 x 512 x num_classes
const std::vector<int> outputs = interpreter->outputs();
assert(outputs.size() == 1);
// Now we only support float32 type output tensor
assert(mask_output->type == kTfLiteFloat32);
int output = interpreter->outputs()[0];
TfLiteTensor* mask_output = interpreter->tensor(output);
TfLiteIntArray* output_dims = mask_output->dims;
int mask_batch = output_dims->data[0];
int mask_height = output_dims->data[1];
int mask_width = output_dims->data[2];
int mask_channels = output_dims->data[3];
if (s->verbose) LOG(INFO) << "output tensor info: "
<< "name " << mask_output->name << ", "
<< "type " << mask_output->type << ", "
<< "batch " << mask_batch << ", "
<< "height " << mask_height << ", "
<< "width " << mask_width << ", "
<< "channels " << mask_channels << "\n";
// check if predict mask channel number
// matches classes definition
assert(num_classes == mask_channels);
// Alloc mask array for post process
uint8_t* mask_array = (uint8_t*)malloc(mask_height * mask_width * 1 * sizeof(uint8_t));
if (mask_array == nullptr) {
LOG(ERROR) << "Can't alloc memory\n";
exit(-1);
}
std::vector<uint8_t> class_indexes;
// Do deeplab_postprocess to generate mask array
gettimeofday(&start_time, nullptr);
deeplab_postprocess(mask_output, mask_array, class_indexes);
gettimeofday(&stop_time, nullptr);
LOG(INFO) << "deeplab_postprocess time: " << (get_us(stop_time) - get_us(start_time)) / 1000 << " ms\n";
int save_width, save_height;
if (s->keep_shape) {
// Resize the prediction mask back to original image shape
uint8_t* origin_mask_array = image_resize(mask_array, mask_width, mask_height, 1, image_width, image_height, 1);
// free prediction mask
free(mask_array);
mask_array = origin_mask_array;
save_width = image_width;
save_height = image_height;
} else {
save_width = mask_width;
save_height = mask_height;
}
// Show segment class result
LOG(INFO) << "Segment class:\n";
for(auto class_index : class_indexes) {
LOG(INFO) << classes[class_index] << "\n";
}
// Save mask array to png image file
stbi_write_png(s->mask_img_name.c_str(), save_width, save_height, 1, mask_array, 0);
LOG(INFO) << "Segmentation result has been saved to: " << s->mask_img_name << "\n";
return;
}
void display_usage() {
LOG(INFO)
<< "Usage: deeplabSegment\n"
<< "--tflite_model, -m: model_name.tflite\n"
<< "--image, -i: image_name.jpg\n"
<< "--classes, -l: classes labels for the model\n"
<< "--input_mean, -b: input mean\n"
<< "--input_std, -s: input standard deviation\n"
<< "--allow_fp16, -f: [0|1], allow running fp32 models with fp16 or not\n"
<< "--threads, -t: number of threads\n"
<< "--count, -c: loop interpreter->Invoke() for certain times\n"
<< "--warmup_runs, -w: number of warmup runs\n"
<< "--mask, -k: mask png file to save segment output\n"
<< "--keep_shape, -p: [0|1] keep predict mask as the same shape of input image\n"
<< "--verbose, -v: [0|1] print more information\n"
<< "\n";
}
int Main(int argc, char** argv) {
Settings s;
int c;
while (1) {
static struct option long_options[] = {
{"tflite_model", required_argument, nullptr, 'm'},
{"image", required_argument, nullptr, 'i'},
{"classes", required_argument, nullptr, 'l'},
{"input_mean", required_argument, nullptr, 'b'},
{"input_std", required_argument, nullptr, 's'},
{"threads", required_argument, nullptr, 't'},
{"allow_fp16", required_argument, nullptr, 'f'},
{"count", required_argument, nullptr, 'c'},
{"warmup_runs", required_argument, nullptr, 'w'},
{"mask", required_argument, nullptr, 'k'},
{"keep_shape", required_argument, nullptr, 'p'},
{"verbose", required_argument, nullptr, 'v'},
{"help", no_argument, nullptr, 'h'},
{nullptr, 0, nullptr, 0}};
/* getopt_long stores the option index here. */
int option_index = 0;
c = getopt_long(argc, argv,
"b:c:f:i:hk:l:m:p:s:t:v:w:", long_options,
&option_index);
/* Detect the end of the options. */
if (c == -1) break;
switch (c) {
case 'b':
s.input_mean = strtod(optarg, nullptr);
break;
case 'c':
s.loop_count =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'f':
s.allow_fp16 =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'i':
s.input_img_name = optarg;
break;
case 'l':
s.classes_file_name = optarg;
break;
case 'm':
s.model_name = optarg;
break;
case 's':
s.input_std = strtod(optarg, nullptr);
break;
case 't':
s.number_of_threads = strtol( // NOLINT(runtime/deprecated_fn)
optarg, nullptr, 10);
break;
case 'v':
s.verbose =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'w':
s.number_of_warmup_runs =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'p':
s.keep_shape =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'k':
s.mask_img_name = optarg;
break;
case 'h':
case '?':
default:
/* getopt_long already printed an error message. */
display_usage();
exit(-1);
exit(-1);
}
}
RunInference(&s);
return 0;
}
} // namespace deeplabSegment
int main(int argc, char** argv) {
return deeplabSegment::Main(argc, argv);
}