
Input: FP32 histogram H with 2048 bins: bin[ 0 ], , bin[ 2047 ]
For i in range( 128 , 2048 ):
reference_distribution_P = [ bin[ 0 ] , ..., bin[ i-1 ] ] // take first ‘ i ‘ bins from H
outliers_count = sum( bin[ i ] , bin[ i+1 ] , , bin[ 2047 ] )
reference_distribution_P[ i-1 ] += outliers_count
P /= sum(P) // normalize distribution P
candidate_distribution_Q = quantize [ bin[ 0 ], , bin[ i-1 ] ] into 128 levels // explained later
expand candidate_distribution_Q to i bins // explained later
Q /= sum(Q) // normalize distribution Q
divergence[ i ] = KL_divergence( reference_distribution_P, candidate_distribution_Q)
End For
Find index ‘m’ for which divergence[ m ] is minimal
threshold = ( m + 0.5 ) * ( width of a bin )



class QuantNet : public ncnn::Net
int get_conv_names();
int get_conv_bottom_blob_names();
int get_conv_weight_blob_scales();
int get_input_names();

std::vector<std::string> conv_names;
std::map<std::string,std::string> conv_bottom_blob_names;
std::map<std::string,std::vector<float> > weight_scales;
std::vector<std::string> input_names;

int QuantNet::get_input_names()
for (size_t i=0; i<layers.size(); i++)
ncnn::Layer* layer = layers[i];
if (layer->type == "Input")
for (size_t j=0; j<layer->tops.size(); j++)
int blob_index = layer->tops[j];
std::string name = blobs[blob_index].name.c_str();

return 0;

int QuantNet::get_conv_names()
for (size_t i=0; i<layers.size(); i++)
ncnn::Layer* layer = layers[i];

if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
std::string name = layer->name;

return 0;

int QuantNet::get_conv_bottom_blob_names()
// find conv bottom name or index
for (size_t i=0; i<layers.size(); i++)
ncnn::Layer* layer = layers[i];

if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
std::string name = layer->name;
std::string bottom_blob_name = blobs[layer->bottoms[0]].name;
conv_bottom_blob_names[name] = bottom_blob_name;

return 0;

int QuantNet::get_conv_weight_blob_scales()
for (size_t i=0; i<layers.size(); i++)
ncnn::Layer* layer = layers[i];

if (layer->type == "Convolution")
std::string name = layer->name;
const int weight_data_size_output = ((ncnn::Convolution*)layer)->weight_data_size / ((ncnn::Convolution*)layer)->num_output;
std::vector<float> scales;

// int8 winograd F43 needs weight data to use 6bit quantization
bool quant_6bit = false;
int kernel_w = ((ncnn::Convolution*)layer)->kernel_w;
int kernel_h = ((ncnn::Convolution*)layer)->kernel_h;
int dilation_w = ((ncnn::Convolution*)layer)->dilation_w;
int dilation_h = ((ncnn::Convolution*)layer)->dilation_h;
int stride_w = ((ncnn::Convolution*)layer)->stride_w;
int stride_h = ((ncnn::Convolution*)layer)->stride_h;

if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
quant_6bit = true;

for (int n=0; n<((ncnn::Convolution*)layer)->num_output; n++)
const ncnn::Mat weight_data_n = ((ncnn::Convolution*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
const float *data_n = weight_data_n;
float max_value = std::numeric_limits<float>::min();

for (int i = 0; i < weight_data_size_output; i++)
max_value = std::max(max_value, std::fabs(data_n[i]));

if (quant_6bit)
scales.push_back(31 / max_value);
scales.push_back(127 / max_value);

weight_scales[name] = scales;

if (layer->type == "ConvolutionDepthWise")
std::string name = layer->name;
const int weight_data_size_output = ((ncnn::ConvolutionDepthWise*)layer)->weight_data_size / ((ncnn::ConvolutionDepthWise*)layer)->group;
std::vector<float> scales;

for (int n=0; n<((ncnn::ConvolutionDepthWise*)layer)->group; n++)
const ncnn::Mat weight_data_n = ((ncnn::ConvolutionDepthWise*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
const float *data_n = weight_data_n;
float max_value = std::numeric_limits<float>::min();

for (int i = 0; i < weight_data_size_output; i++)
max_value = std::max(max_value, std::fabs(data_n[i]));

scales.push_back(127 / max_value);

weight_scales[name] = scales;

if (layer->type == "InnerProduct")
std::string name = layer->name;
const int weight_data_size_output = ((ncnn::InnerProduct*)layer)->weight_data_size / ((ncnn::InnerProduct*)layer)->num_output;
std::vector<float> scales;

for (int n=0; n<((ncnn::InnerProduct*)layer)->num_output; n++)
const ncnn::Mat weight_data_n = ((ncnn::InnerProduct*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
const float *data_n = weight_data_n;
float max_value = std::numeric_limits<float>::min();

for (int i = 0; i < weight_data_size_output; i++)
max_value = std::max(max_value, std::fabs(data_n[i]));

scales.push_back(127 / max_value);

weight_scales[name] = scales;

return 0;


class QuantizeData
QuantizeData(std::string layer_name, int num);

int initial_blob_max(ncnn::Mat data);
int initial_histogram_interval();
int initial_histogram_value();

int normalize_histogram();
int update_histogram(ncnn::Mat data);

float compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b);
int threshold_distribution(const std::vector<float> &distribution, const int target_bin=128);
float get_data_blob_scale();

std::string name;

float max_value;
int num_bins;
float histogram_interval;
std::vector<float> histogram;

float threshold;
int threshold_bin;
float scale;

QuantizeData::QuantizeData(std::string layer_name, int num)
name = layer_name;
max_value = 0.0;
num_bins = num;
histogram_interval = 0.0;

int QuantizeData::initial_blob_max(ncnn::Mat data)
int channel_num = data.c;
int size = data.w * data.h;

for (int q=0; q<channel_num; q++)
const float *data_n = data.channel(q);
for(int i=0; i<size; i++)
max_value = std::max(max_value, std::fabs(data_n[i]));

return 0;

int QuantizeData::initial_histogram_interval()
histogram_interval = max_value / num_bins;

return 0;

int QuantizeData::initial_histogram_value()
for (size_t i=0; i<histogram.size(); i++)
histogram[i] = 0.00001;

return 0;

int QuantizeData::normalize_histogram()
const int length = histogram.size();
float sum = 0;

for (int i=0; i<length; i++)
sum += histogram[i];

for (int i=0; i<length; i++)
histogram[i] /= sum;

return 0;

int QuantizeData::update_histogram(ncnn::Mat data)
int channel_num = data.c;
int size = data.w * data.h;

for (int q=0; q<channel_num; q++)
const float *data_n = data.channel(q);
for(int i=0; i<size; i++)
if (data_n[i] == 0)

int index = std::min(static_cast<int>(std::abs(data_n[i]) / histogram_interval), 2047);


return 0;

float QuantizeData::compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b)
const int length = dist_a.size();
assert(dist_b.size() == length);
float result = 0;

for (int i=0; i<length; i++)
if (dist_a[i] != 0)
if (dist_b[i] == 0)
result += 1;
result += dist_a[i] * log(dist_a[i] / dist_b[i]);

return result;

int QuantizeData::threshold_distribution(const std::vector<float> &distribution, const int target_bin)
int target_threshold = target_bin;
float min_kl_divergence = 1000;
const int length = distribution.size();

std::vector<float> quantize_distribution(target_bin);

float threshold_sum = 0;
for (int threshold=target_bin; threshold<length; threshold++)
threshold_sum += distribution[threshold];

for (int threshold=target_bin; threshold<length; threshold++)

std::vector<float> t_distribution(distribution.begin(), distribution.begin()+threshold);

t_distribution[threshold-1] += threshold_sum;
threshold_sum -= distribution[threshold];

// get P
fill(quantize_distribution.begin(), quantize_distribution.end(), 0);

const float num_per_bin = static_cast<float>(threshold) / target_bin;

for (int i=0; i<target_bin; i++)
const float start = i * num_per_bin;
const float end = start + num_per_bin;

const int left_upper = ceil(start);
if (left_upper > start)
const float left_scale = left_upper - start;
quantize_distribution[i] += left_scale * distribution[left_upper - 1];

const int right_lower = floor(end);

if (right_lower < end)

const float right_scale = end - right_lower;
quantize_distribution[i] += right_scale * distribution[right_lower];

for (int j=left_upper; j<right_lower; j++)
quantize_distribution[i] += distribution[j];

// get Q
std::vector<float> expand_distribution(threshold, 0);

for (int i=0; i<target_bin; i++)
const float start = i * num_per_bin;
const float end = start + num_per_bin;

float count = 0;

const int left_upper = ceil(start);
float left_scale = 0;
if (left_upper > start)
left_scale = left_upper - start;
if (distribution[left_upper - 1] != 0)
count += left_scale;

const int right_lower = floor(end);
float right_scale = 0;
if (right_lower < end)
right_scale = end - right_lower;
if (distribution[right_lower] != 0)
count += right_scale;

for (int j=left_upper; j<right_lower; j++)
if (distribution[j] != 0)

const float expand_value = quantize_distribution[i] / count;

if (left_upper > start)
if (distribution[left_upper - 1] != 0)
expand_distribution[left_upper - 1] += expand_value * left_scale;
if (right_lower < end)
if (distribution[right_lower] != 0)
expand_distribution[right_lower] += expand_value * right_scale;
for (int j=left_upper; j<right_lower; j++)
if (distribution[j] != 0)
expand_distribution[j] += expand_value;

// kl
float kl_divergence = compute_kl_divergence(t_distribution, expand_distribution);

// the best num of bin
if (kl_divergence < min_kl_divergence)
min_kl_divergence = kl_divergence;
target_threshold = threshold;

return target_threshold;

float QuantizeData::get_data_blob_scale()
threshold_bin = threshold_distribution(histogram);
threshold = (threshold_bin + 0.5) * histogram_interval;
scale = 127 / threshold;
return scale;


static int post_training_quantize(const std::vector<std::string> filenames, const char* param_path, const char* bin_path, const char* table_path, struct PreParam per_param)
int size = filenames.size();

QuantNet net;
net.opt = g_default_option;


float mean_vals[3], norm_vals[3];
int weith = per_param.weith;
int height = per_param.height;
bool swapRB = per_param.swapRB;

mean_vals[0] = per_param.mean[0];
mean_vals[1] = per_param.mean[1];
mean_vals[2] = per_param.mean[2];

norm_vals[0] = per_param.norm[0];
norm_vals[1] = per_param.norm[1];
norm_vals[2] = per_param.norm[2];



if (net.input_names.size() <= 0)
fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n");
return -1;

FILE *fp=fopen(table_path, "w");

// save quantization scale of weight
printf("====> Quantize the parameters.\n");
for (size_t i=0; i<net.conv_names.size(); i++)
std::string layer_name = net.conv_names[i];
std::string blob_name = net.conv_bottom_blob_names[layer_name];
std::vector<float> weight_scale_n = net.weight_scales[layer_name];

fprintf(fp, "%s_param_0 ", layer_name.c_str());
for (size_t j=0; j<weight_scale_n.size(); j++)
fprintf(fp, "%f ", weight_scale_n[j]);
fprintf(fp, "\n");

// initial quantization data
std::vector<QuantizeData> quantize_datas;

for (size_t i=0; i<net.conv_names.size(); i++)
std::string layer_name = net.conv_names[i];

QuantizeData quantize_data(layer_name, 2048);

// step 1 count the max value
printf("====> Quantize the activation.\n");
printf(" ====> step 1 : find the max value.\n");

for (size_t i=0; i<filenames.size(); i++)
std::string img_name = filenames[i];

if ((i+1)%100 == 0)
fprintf(stderr, " %d/%d\n", (int)(i+1), (int)size);

cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
if (bgr.empty())
fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
return -1;

ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height);
in.substract_mean_normalize(mean_vals, norm_vals);

ncnn::Extractor ex = net.create_extractor();
ex.input(net.input_names[0].c_str(), in);

for (size_t i=0; i<net.conv_names.size(); i++)
std::string layer_name = net.conv_names[i];
std::string blob_name = net.conv_bottom_blob_names[layer_name];

ncnn::Mat out;
ex.extract(blob_name.c_str(), out);

for (size_t j=0; j<quantize_datas.size(); j++)
if (quantize_datas[j].name == layer_name)

// step 2 histogram_interval
printf(" ====> step 2 : generate the histogram_interval.\n");
for (size_t i=0; i<net.conv_names.size(); i++)
std::string layer_name = net.conv_names[i];

for (size_t j=0; j<quantize_datas.size(); j++)
if (quantize_datas[j].name == layer_name)

fprintf(stderr, "%-20s : max = %-15f interval = %-10f\n", quantize_datas[j].name.c_str(), quantize_datas[j].max_value, quantize_datas[j].histogram_interval);

// step 3 histogram
printf(" ====> step 3 : generate the histogram.\n");
for (size_t i=0; i<filenames.size(); i++)
std::string img_name = filenames[i];

if ((i+1)%100 == 0)
fprintf(stderr, " %d/%d\n", (int)(i+1), (int)size);
cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
if (bgr.empty())
fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
return -1;

ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height);
in.substract_mean_normalize(mean_vals, norm_vals);

ncnn::Extractor ex = net.create_extractor();
ex.input(net.input_names[0].c_str(), in);

for (size_t i=0; i<net.conv_names.size(); i++)
std::string layer_name = net.conv_names[i];
std::string blob_name = net.conv_bottom_blob_names[layer_name];

ncnn::Mat out;
ex.extract(blob_name.c_str(), out);

for (size_t j=0; j<quantize_datas.size(); j++)
if (quantize_datas[j].name == layer_name)

// step4 kld
printf(" ====> step 4 : using kld to find the best threshold value.\n");
for (size_t i=0; i<net.conv_names.size(); i++)
std::string layer_name = net.conv_names[i];
std::string blob_name = net.conv_bottom_blob_names[layer_name];
fprintf(stderr, "%-20s ", layer_name.c_str());

for (size_t j=0; j<quantize_datas.size(); j++)
if (quantize_datas[j].name == layer_name)
fprintf(stderr, "bin : %-8d threshold : %-15f interval : %-10f scale : %-10f\n", \
quantize_datas[j].threshold_bin, \
quantize_datas[j].threshold, \
quantize_datas[j].histogram_interval, \

fprintf(fp, "%s %f\n", layer_name.c_str(), quantize_datas[j].scale);


printf("====> Save the calibration table done.\n");

return 0;


struct PreParam
float mean[3];
float norm[3];
int weith;
int height;
bool swapRB;


// Get the filenames from direct path
int parse_images_dir(const char *base_path, std::vector<std::string>& file_path)
DIR *dir;
struct dirent *ptr;

if ((dir=opendir(base_path)) == NULL)
perror("Open dir error...");

while ((ptr=readdir(dir)) != NULL)
if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0) ///current dir OR parrent dir

std::string path = base_path;
file_path.push_back(path + ptr->d_name);

return 0;


# add_subdirectory(quantize)


1 ​​​ncnn​​​ https://github.com/Tencent/ncnn
2 ​​​NCNN Conv量化详解(一)​​​ https://zhuanlan.zhihu.com/p/71881443
3 ​​​NCNN量化详解(二)​​​ https://zhuanlan.zhihu.com/p/72375164
4 ​​​人工智能学习干货|深度学习模型量化理论+实践​​ https://www.toutiao.com/i6776432142281867788/