diff --git a/src/activation_layer.c b/src/activation_layer.c
index b8b5d0236c3..4383d7e1b3d 100644
--- a/src/activation_layer.c
+++ b/src/activation_layer.c
@@ -18,8 +18,8 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
     l.outputs = inputs;
     l.batch=batch;
 
-    l.output = (float*)calloc(batch * inputs, sizeof(float));
-    l.delta = (float*)calloc(batch * inputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * inputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * inputs, sizeof(float));
 
     l.forward = forward_activation_layer;
     l.backward = backward_activation_layer;
diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c
index 08081bd8041..1306fd38636 100644
--- a/src/avgpool_layer.c
+++ b/src/avgpool_layer.c
@@ -17,8 +17,8 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
     l.outputs = l.out_c;
     l.inputs = h*w*c;
     int output_size = l.outputs * batch;
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));
     l.forward = forward_avgpool_layer;
     l.backward = backward_avgpool_layer;
     #ifdef GPU
diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c
index 4f9536a99ce..018294df6e5 100644
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@@ -11,23 +11,23 @@ layer make_batchnorm_layer(int batch, int w, int h, int c)
     layer.h = layer.out_h = h;
     layer.w = layer.out_w = w;
     layer.c = layer.out_c = c;
-    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float));
     layer.inputs = w*h*c;
     layer.outputs = layer.inputs;
 
-    layer.scales = (float*)calloc(c, sizeof(float));
-    layer.scale_updates = (float*)calloc(c, sizeof(float));
+    layer.scales = (float*)xcalloc(c, sizeof(float));
+    layer.scale_updates = (float*)xcalloc(c, sizeof(float));
     int i;
     for(i = 0; i < c; ++i){
         layer.scales[i] = 1;
     }
 
-    layer.mean = (float*)calloc(c, sizeof(float));
-    layer.variance = (float*)calloc(c, sizeof(float));
+    layer.mean = (float*)xcalloc(c, sizeof(float));
+    layer.variance = (float*)xcalloc(c, sizeof(float));
 
-    layer.rolling_mean = (float*)calloc(c, sizeof(float));
-    layer.rolling_variance = (float*)calloc(c, sizeof(float));
+    layer.rolling_mean = (float*)xcalloc(c, sizeof(float));
+    layer.rolling_variance = (float*)xcalloc(c, sizeof(float));
 
     layer.forward = forward_batchnorm_layer;
     layer.backward = backward_batchnorm_layer;
diff --git a/src/blas.c b/src/blas.c
index d00cb89ddf0..96fc067f963 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -34,7 +34,7 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride,
 
 void flatten(float *x, int size, int layers, int batch, int forward)
 {
-    float* swap = (float*)calloc(size * layers * batch, sizeof(float));
+    float* swap = (float*)xcalloc(size * layers * batch, sizeof(float));
     int i,c,b;
     for(b = 0; b < batch; ++b){
         for(c = 0; c < layers; ++c){
diff --git a/src/box.c b/src/box.c
index 640f54a299e..a4180f29066 100644
--- a/src/box.c
+++ b/src/box.c
@@ -1,4 +1,5 @@
 #include "box.h"
+#include "utils.h"
 #include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
@@ -379,7 +380,7 @@ int nms_comparator(const void *pa, const void *pb)
 void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh)
 {
     int i, j, k;
-    sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));
+    sortable_bbox* s = (sortable_bbox*)xcalloc(total, sizeof(sortable_bbox));
 
     for(i = 0; i < total; ++i){
         s[i].index = i;
diff --git a/src/classifier.c b/src/classifier.c
index fba06a6eae3..d75b320e595 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -17,7 +17,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
 
 float *get_regression_values(char **labels, int n)
 {
-    float* v = (float*)calloc(n, sizeof(float));
+    float* v = (float*)xcalloc(n, sizeof(float));
     int i;
     for(i = 0; i < n; ++i){
         char *p = strchr(labels[i], ' ');
@@ -35,7 +35,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
     printf("%d\n", ngpus);
-    network* nets = (network*)calloc(ngpus, sizeof(network));
+    network* nets = (network*)xcalloc(ngpus, sizeof(network));
 
     srand(time(0));
     int seed = rand();
@@ -415,7 +415,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));
 
     for(i = 0; i < m; ++i){
         int class_id = -1;
@@ -442,7 +442,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
         images[7] = crop_image(im, 0, 0, w, h);
         images[8] = crop_image(im, -shift, shift, w, h);
         images[9] = crop_image(im, shift, shift, w, h);
-        float* pred = (float*)calloc(classes, sizeof(float));
+        float* pred = (float*)xcalloc(classes, sizeof(float));
         for(j = 0; j < 10; ++j){
             float *p = network_predict(net, images[j].data);
             if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
@@ -489,7 +489,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));
 
     int size = net.w;
     for(i = 0; i < m; ++i){
@@ -567,7 +567,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));
 
     for(i = 0; i < m; ++i){
         int class_id = -1;
@@ -638,7 +638,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));
 
     for(i = 0; i < m; ++i){
         int class_id = -1;
@@ -649,7 +649,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
                 break;
             }
         }
-        float* pred = (float*)calloc(classes, sizeof(float));
+        float* pred = (float*)xcalloc(classes, sizeof(float));
         image im = load_image_color(paths[i], 0, 0);
         for(j = 0; j < nscales; ++j){
             image r = resize_min(im, scales[j]);
@@ -694,7 +694,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
 
     char **names = get_labels(name_list);
     clock_t time;
-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));
     char buff[256];
     char *input = buff;
     while(1){
@@ -781,7 +781,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
     int i = 0;
     char **names = get_labels(name_list);
     clock_t time;
-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));
     if(!indexes) {
         error("calloc failed");
     }
@@ -971,7 +971,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
     char *name_list = option_find_str(options, "names", 0);
     char **names = get_labels(name_list);
 
-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));
 
     if(!cap) error("Couldn't connect to webcam.\n");
     create_window_cv("Threat", 0, 512, 512);
@@ -1110,7 +1110,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
     char *name_list = option_find_str(options, "names", 0);
     char **names = get_labels(name_list);
 
-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));
 
     if(!cap) error("Couldn't connect to webcam.\n");
     cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
@@ -1193,7 +1193,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
     char *name_list = option_find_str(options, "names", 0);
     char **names = get_labels(name_list);
 
-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));
 
     if(!cap) error("Couldn't connect to webcam.\n");
     create_window_cv("Classifier", 0, 512, 512);
@@ -1258,7 +1258,7 @@ void run_classifier(int argc, char **argv)
         for(i = 0; i < len; ++i){
             if (gpu_list[i] == ',') ++ngpus;
         }
-        gpus = (int*)calloc(ngpus, sizeof(int));
+        gpus = (int*)xcalloc(ngpus, sizeof(int));
         for(i = 0; i < ngpus; ++i){
             gpus[i] = atoi(gpu_list);
             gpu_list = strchr(gpu_list, ',')+1;
diff --git a/src/coco.c b/src/coco.c
index 83089d764a6..20febecece1 100644
--- a/src/coco.c
+++ b/src/coco.c
@@ -160,9 +160,9 @@ void validate_coco(char *cfgfile, char *weightfile)
     FILE *fp = fopen(buff, "w");
     fprintf(fp, "[\n");
 
-    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
-    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
+    box* boxes = (box*)xcalloc(side * side * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(side * side * l.n, sizeof(float*));
+    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)xcalloc(classes, sizeof(float));
 
     int m = plist->size;
     int i=0;
@@ -173,11 +173,11 @@ void validate_coco(char *cfgfile, char *weightfile)
     float iou_thresh = .5;
 
     int nthreads = 8;
-    image* val = (image*)calloc(nthreads, sizeof(image));
-    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)xcalloc(nthreads, sizeof(image));
+    image* val_resized = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)xcalloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t));
 
     load_args args = {0};
     args.w = net.w;
@@ -249,29 +249,17 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
 
     int j, k;
     /* unused code,why?
-    FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
-    if(!fps) {
-        error("calloc failed");
-    }
+    FILE** fps = (FILE**)xcalloc(classes, sizeof(FILE*));
     for(j = 0; j < classes; ++j){
         char buff[1024];
         snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
         fps[j] = fopen(buff, "w");
     }
     */
-    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
-    if(!boxes) {
-        error("calloc failed");
-    }
-    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
-    if(!probs) {
-        error("calloc failed");
-    }
+    box* boxes = (box*)xcalloc(side * side * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(side * side * l.n, sizeof(float*));
     for(j = 0; j < side*side*l.n; ++j) {
-      probs[j] = (float*)calloc(classes, sizeof(float));
-      if(!probs[j]) {
-          error("calloc failed");
-      }
+      probs[j] = (float*)xcalloc(classes, sizeof(float));
     }
 
     int m = plist->size;
@@ -349,19 +337,10 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
     char buff[256];
     char *input = buff;
     int j;
-    box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
-    if(!boxes) {
-        error("calloc failed");
-    }
-    float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
-    if(!probs) {
-        error("calloc failed");
-    }
+    box* boxes = (box*)xcalloc(l.side * l.side * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(l.side * l.side * l.n, sizeof(float*));
     for(j = 0; j < l.side*l.side*l.n; ++j) {
-      probs[j] = (float*)calloc(l.classes, sizeof(float));
-      if(!probs[j]) {
-          error("calloc failed");
-      }
+      probs[j] = (float*)xcalloc(l.classes, sizeof(float));
     }
     while(1){
         if(filename){
diff --git a/src/compare.c b/src/compare.c
index bb8422611b9..62edabe965c 100644
--- a/src/compare.c
+++ b/src/compare.c
@@ -176,7 +176,7 @@ int bbox_comparator(const void *a, const void *b)
 
     image im1 = load_image_color(box1.filename, net.w, net.h);
     image im2 = load_image_color(box2.filename, net.w, net.h);
-    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
+    float* X = (float*)xcalloc(net.w * net.h * net.c, sizeof(float));
     memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
     memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
     float *predictions = network_predict(net, X);
@@ -205,7 +205,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
 {
     image im1 = load_image_color(a->filename, net.w, net.h);
     image im2 = load_image_color(b->filename, net.w, net.h);
-    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
+    float* X = (float*)xcalloc(net.w * net.h * net.c, sizeof(float));
     memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
     memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
     float *predictions = network_predict(net, X);
@@ -239,7 +239,7 @@ void SortMaster3000(char *filename, char *weightfile)
     char **paths = (char **)list_to_array(plist);
     int N = plist->size;
     free_list(plist);
-    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)xcalloc(N, sizeof(sortable_bbox));
     printf("Sorting %d boxes...\n", N);
     for(i = 0; i < N; ++i){
         boxes[i].filename = paths[i];
@@ -274,13 +274,13 @@ void BattleRoyaleWithCheese(char *filename, char *weightfile)
     int N = plist->size;
     int total = N;
     free_list(plist);
-    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)xcalloc(N, sizeof(sortable_bbox));
     printf("Battling %d boxes...\n", N);
     for(i = 0; i < N; ++i){
         boxes[i].filename = paths[i];
         boxes[i].net = net;
         boxes[i].classes = classes;
-        boxes[i].elos = (float*)calloc(classes, sizeof(float));
+        boxes[i].elos = (float*)xcalloc(classes, sizeof(float));
         for(j = 0; j < classes; ++j){
             boxes[i].elos[j] = 1500;
         }
diff --git a/src/connected_layer.c b/src/connected_layer.c
index 242ab8fb106..1b1218e5f2a 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -74,14 +74,14 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
     l.activation = activation;
     l.learning_rate_scale = 1;
 
-    l.output = (float*)calloc(total_batch * outputs, sizeof(float));
-    l.delta = (float*)calloc(total_batch * outputs, sizeof(float));
+    l.output = (float*)xcalloc(total_batch * outputs, sizeof(float));
+    l.delta = (float*)xcalloc(total_batch * outputs, sizeof(float));
 
-    l.weight_updates = (float*)calloc(inputs * outputs, sizeof(float));
-    l.bias_updates = (float*)calloc(outputs, sizeof(float));
+    l.weight_updates = (float*)xcalloc(inputs * outputs, sizeof(float));
+    l.bias_updates = (float*)xcalloc(outputs, sizeof(float));
 
-    l.weights = (float*)calloc(outputs * inputs, sizeof(float));
-    l.biases = (float*)calloc(outputs, sizeof(float));
+    l.weights = (float*)xcalloc(outputs * inputs, sizeof(float));
+    l.biases = (float*)xcalloc(outputs, sizeof(float));
 
     l.forward = forward_connected_layer;
     l.backward = backward_connected_layer;
@@ -98,22 +98,22 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
     }
 
     if(batch_normalize){
-        l.scales = (float*)calloc(outputs, sizeof(float));
-        l.scale_updates = (float*)calloc(outputs, sizeof(float));
+        l.scales = (float*)xcalloc(outputs, sizeof(float));
+        l.scale_updates = (float*)xcalloc(outputs, sizeof(float));
         for(i = 0; i < outputs; ++i){
             l.scales[i] = 1;
         }
 
-        l.mean = (float*)calloc(outputs, sizeof(float));
-        l.mean_delta = (float*)calloc(outputs, sizeof(float));
-        l.variance = (float*)calloc(outputs, sizeof(float));
-        l.variance_delta = (float*)calloc(outputs, sizeof(float));
+        l.mean = (float*)xcalloc(outputs, sizeof(float));
+        l.mean_delta = (float*)xcalloc(outputs, sizeof(float));
+        l.variance = (float*)xcalloc(outputs, sizeof(float));
+        l.variance_delta = (float*)xcalloc(outputs, sizeof(float));
 
-        l.rolling_mean = (float*)calloc(outputs, sizeof(float));
-        l.rolling_variance = (float*)calloc(outputs, sizeof(float));
+        l.rolling_mean = (float*)xcalloc(outputs, sizeof(float));
+        l.rolling_variance = (float*)xcalloc(outputs, sizeof(float));
 
-        l.x = (float*)calloc(total_batch * outputs, sizeof(float));
-        l.x_norm = (float*)calloc(total_batch * outputs, sizeof(float));
+        l.x = (float*)xcalloc(total_batch * outputs, sizeof(float));
+        l.x_norm = (float*)xcalloc(total_batch * outputs, sizeof(float));
     }
 
 #ifdef GPU
diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c
index 6cbaf1c3911..2fc2164f686 100644
--- a/src/conv_lstm_layer.c
+++ b/src/conv_lstm_layer.c
@@ -65,65 +65,65 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i
     l.peephole = peephole;
 
     // U
-    l.uf = (layer*)calloc(1, sizeof(layer));
+    l.uf = (layer*)xcalloc(1, sizeof(layer));
     *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.uf->batch = batch;
     if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
 
-    l.ui = (layer*)calloc(1, sizeof(layer));
+    l.ui = (layer*)xcalloc(1, sizeof(layer));
     *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.ui->batch = batch;
     if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
 
-    l.ug = (layer*)calloc(1, sizeof(layer));
+    l.ug = (layer*)xcalloc(1, sizeof(layer));
     *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.ug->batch = batch;
     if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
 
-    l.uo = (layer*)calloc(1, sizeof(layer));
+    l.uo = (layer*)xcalloc(1, sizeof(layer));
     *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.uo->batch = batch;
     if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
 
 
     // W
-    l.wf = (layer*)calloc(1, sizeof(layer));
+    l.wf = (layer*)xcalloc(1, sizeof(layer));
     *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.wf->batch = batch;
     if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
 
-    l.wi = (layer*)calloc(1, sizeof(layer));
+    l.wi = (layer*)xcalloc(1, sizeof(layer));
     *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.wi->batch = batch;
     if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
 
-    l.wg = (layer*)calloc(1, sizeof(layer));
+    l.wg = (layer*)xcalloc(1, sizeof(layer));
     *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.wg->batch = batch;
     if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
 
-    l.wo = (layer*)calloc(1, sizeof(layer));
+    l.wo = (layer*)xcalloc(1, sizeof(layer));
     *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.wo->batch = batch;
     if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size;
 
 
     // V
-    l.vf = (layer*)calloc(1, sizeof(layer));
+    l.vf = (layer*)xcalloc(1, sizeof(layer));
     if (l.peephole) {
         *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
         l.vf->batch = batch;
         if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size;
     }
 
-    l.vi = (layer*)calloc(1, sizeof(layer));
+    l.vi = (layer*)xcalloc(1, sizeof(layer));
     if (l.peephole) {
         *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
         l.vi->batch = batch;
         if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size;
     }
 
-    l.vo = (layer*)calloc(1, sizeof(layer));
+    l.vo = (layer*)xcalloc(1, sizeof(layer));
     if (l.peephole) {
         *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
         l.vo->batch = batch;
@@ -141,30 +141,30 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i
 
     assert(l.wo->outputs == l.uo->outputs);
 
-    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    //l.state = (float*)calloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    //l.state = (float*)xcalloc(outputs * batch, sizeof(float));
 
     l.forward = forward_conv_lstm_layer;
     l.update = update_conv_lstm_layer;
     l.backward = backward_conv_lstm_layer;
 
-    l.prev_state_cpu =  (float*)calloc(batch*outputs, sizeof(float));
-    l.prev_cell_cpu =   (float*)calloc(batch*outputs, sizeof(float));
-    l.cell_cpu =        (float*)calloc(batch*outputs*steps, sizeof(float));
-
-    l.f_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.i_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.g_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.o_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.c_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.stored_c_cpu = (float*)calloc(batch*outputs, sizeof(float));
-    l.h_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.stored_h_cpu = (float*)calloc(batch*outputs, sizeof(float));
-    l.temp_cpu =        (float*)calloc(batch*outputs, sizeof(float));
-    l.temp2_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.temp3_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.dc_cpu =          (float*)calloc(batch*outputs, sizeof(float));
-    l.dh_cpu =          (float*)calloc(batch*outputs, sizeof(float));
+    l.prev_state_cpu =  (float*)xcalloc(batch*outputs, sizeof(float));
+    l.prev_cell_cpu =   (float*)xcalloc(batch*outputs, sizeof(float));
+    l.cell_cpu =        (float*)xcalloc(batch*outputs*steps, sizeof(float));
+
+    l.f_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.i_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.g_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.o_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.c_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.stored_c_cpu = (float*)xcalloc(batch*outputs, sizeof(float));
+    l.h_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.stored_h_cpu = (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp_cpu =        (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp2_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp3_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dc_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dh_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));
 
 #ifdef GPU
     l.forward_gpu = forward_conv_lstm_layer_gpu;
@@ -275,26 +275,26 @@ void resize_conv_lstm_layer(layer *l, int w, int h)
 
     assert(l->wo->outputs == l->uo->outputs);
 
-    l->output = (float*)realloc(l->output, outputs * batch * steps * sizeof(float));
-    //l->state = (float*)realloc(l->state, outputs * batch * sizeof(float));
-
-    l->prev_state_cpu = (float*)realloc(l->prev_state_cpu, batch*outputs * sizeof(float));
-    l->prev_cell_cpu = (float*)realloc(l->prev_cell_cpu, batch*outputs * sizeof(float));
-    l->cell_cpu = (float*)realloc(l->cell_cpu, batch*outputs*steps * sizeof(float));
-
-    l->f_cpu = (float*)realloc(l->f_cpu, batch*outputs * sizeof(float));
-    l->i_cpu = (float*)realloc(l->i_cpu, batch*outputs * sizeof(float));
-    l->g_cpu = (float*)realloc(l->g_cpu, batch*outputs * sizeof(float));
-    l->o_cpu = (float*)realloc(l->o_cpu, batch*outputs * sizeof(float));
-    l->c_cpu = (float*)realloc(l->c_cpu, batch*outputs * sizeof(float));
-    l->h_cpu = (float*)realloc(l->h_cpu, batch*outputs * sizeof(float));
-    l->temp_cpu = (float*)realloc(l->temp_cpu, batch*outputs * sizeof(float));
-    l->temp2_cpu = (float*)realloc(l->temp2_cpu, batch*outputs * sizeof(float));
-    l->temp3_cpu = (float*)realloc(l->temp3_cpu, batch*outputs * sizeof(float));
-    l->dc_cpu = (float*)realloc(l->dc_cpu, batch*outputs * sizeof(float));
-    l->dh_cpu = (float*)realloc(l->dh_cpu, batch*outputs * sizeof(float));
-    l->stored_c_cpu = (float*)realloc(l->stored_c_cpu, batch*outputs * sizeof(float));
-    l->stored_h_cpu = (float*)realloc(l->stored_h_cpu, batch*outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, outputs * batch * steps * sizeof(float));
+    //l->state = (float*)xrealloc(l->state, outputs * batch * sizeof(float));
+
+    l->prev_state_cpu = (float*)xrealloc(l->prev_state_cpu, batch*outputs * sizeof(float));
+    l->prev_cell_cpu = (float*)xrealloc(l->prev_cell_cpu, batch*outputs * sizeof(float));
+    l->cell_cpu = (float*)xrealloc(l->cell_cpu, batch*outputs*steps * sizeof(float));
+
+    l->f_cpu = (float*)xrealloc(l->f_cpu, batch*outputs * sizeof(float));
+    l->i_cpu = (float*)xrealloc(l->i_cpu, batch*outputs * sizeof(float));
+    l->g_cpu = (float*)xrealloc(l->g_cpu, batch*outputs * sizeof(float));
+    l->o_cpu = (float*)xrealloc(l->o_cpu, batch*outputs * sizeof(float));
+    l->c_cpu = (float*)xrealloc(l->c_cpu, batch*outputs * sizeof(float));
+    l->h_cpu = (float*)xrealloc(l->h_cpu, batch*outputs * sizeof(float));
+    l->temp_cpu = (float*)xrealloc(l->temp_cpu, batch*outputs * sizeof(float));
+    l->temp2_cpu = (float*)xrealloc(l->temp2_cpu, batch*outputs * sizeof(float));
+    l->temp3_cpu = (float*)xrealloc(l->temp3_cpu, batch*outputs * sizeof(float));
+    l->dc_cpu = (float*)xrealloc(l->dc_cpu, batch*outputs * sizeof(float));
+    l->dh_cpu = (float*)xrealloc(l->dh_cpu, batch*outputs * sizeof(float));
+    l->stored_c_cpu = (float*)xrealloc(l->stored_c_cpu, batch*outputs * sizeof(float));
+    l->stored_h_cpu = (float*)xrealloc(l->stored_h_cpu, batch*outputs * sizeof(float));
 
 #ifdef GPU
     //if (l->state_gpu) cudaFree(l->state_gpu);
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 6ff5b8b3d4f..c5cf39f9908 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -370,11 +370,11 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
         l.bias_updates = l.share_layer->bias_updates;
     }
     else {
-        l.weights = (float*)calloc(l.nweights, sizeof(float));
-        l.weight_updates = (float*)calloc(l.nweights, sizeof(float));
+        l.weights = (float*)xcalloc(l.nweights, sizeof(float));
+        l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float));
 
-        l.biases = (float*)calloc(n, sizeof(float));
-        l.bias_updates = (float*)calloc(n, sizeof(float));
+        l.biases = (float*)xcalloc(n, sizeof(float));
+        l.bias_updates = (float*)xcalloc(n, sizeof(float));
     }
 
     // float scale = 1./sqrt(size*size*c);
@@ -389,36 +389,36 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
     l.inputs = l.w * l.h * l.c;
     l.activation = activation;
 
-    l.output = (float*)calloc(total_batch*l.outputs, sizeof(float));
-    l.delta  = (float*)calloc(total_batch*l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(total_batch*l.outputs, sizeof(float));
+    l.delta  = (float*)xcalloc(total_batch*l.outputs, sizeof(float));
 
     l.forward = forward_convolutional_layer;
     l.backward = backward_convolutional_layer;
     l.update = update_convolutional_layer;
     if(binary){
-        l.binary_weights = (float*)calloc(l.nweights, sizeof(float));
-        l.cweights = (char*)calloc(l.nweights, sizeof(char));
-        l.scales = (float*)calloc(n, sizeof(float));
+        l.binary_weights = (float*)xcalloc(l.nweights, sizeof(float));
+        l.cweights = (char*)xcalloc(l.nweights, sizeof(char));
+        l.scales = (float*)xcalloc(n, sizeof(float));
     }
     if(xnor){
-        l.binary_weights = (float*)calloc(l.nweights, sizeof(float));
-        l.binary_input = (float*)calloc(l.inputs * l.batch, sizeof(float));
+        l.binary_weights = (float*)xcalloc(l.nweights, sizeof(float));
+        l.binary_input = (float*)xcalloc(l.inputs * l.batch, sizeof(float));
 
         int align = 32;// 8;
         int src_align = l.out_h*l.out_w;
         l.bit_align = src_align + (align - src_align % align);
 
-        l.mean_arr = (float*)calloc(l.n, sizeof(float));
+        l.mean_arr = (float*)xcalloc(l.n, sizeof(float));
 
         const size_t new_c = l.c / 32;
         size_t in_re_packed_input_size = new_c * l.w * l.h + 1;
-        l.bin_re_packed_input = (uint32_t*)calloc(in_re_packed_input_size, sizeof(uint32_t));
+        l.bin_re_packed_input = (uint32_t*)xcalloc(in_re_packed_input_size, sizeof(uint32_t));
 
         l.lda_align = 256;  // AVX2
         int k = l.size*l.size*l.c;
         size_t k_aligned = k + (l.lda_align - k%l.lda_align);
         size_t t_bit_input_size = k_aligned * l.bit_align / 8;
-        l.t_bit_input = (char*)calloc(t_bit_input_size, sizeof(char));
+        l.t_bit_input = (char*)xcalloc(t_bit_input_size, sizeof(char));
     }
 
     if(batch_normalize){
@@ -433,36 +433,36 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
             l.rolling_variance = l.share_layer->rolling_variance;
         }
         else {
-            l.scales = (float*)calloc(n, sizeof(float));
-            l.scale_updates = (float*)calloc(n, sizeof(float));
+            l.scales = (float*)xcalloc(n, sizeof(float));
+            l.scale_updates = (float*)xcalloc(n, sizeof(float));
             for (i = 0; i < n; ++i) {
                 l.scales[i] = 1;
             }
 
-            l.mean = (float*)calloc(n, sizeof(float));
-            l.variance = (float*)calloc(n, sizeof(float));
+            l.mean = (float*)xcalloc(n, sizeof(float));
+            l.variance = (float*)xcalloc(n, sizeof(float));
 
-            l.mean_delta = (float*)calloc(n, sizeof(float));
-            l.variance_delta = (float*)calloc(n, sizeof(float));
+            l.mean_delta = (float*)xcalloc(n, sizeof(float));
+            l.variance_delta = (float*)xcalloc(n, sizeof(float));
 
-            l.rolling_mean = (float*)calloc(n, sizeof(float));
-            l.rolling_variance = (float*)calloc(n, sizeof(float));
+            l.rolling_mean = (float*)xcalloc(n, sizeof(float));
+            l.rolling_variance = (float*)xcalloc(n, sizeof(float));
         }
 
-        l.x = (float*)calloc(total_batch * l.outputs, sizeof(float));
-        l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float));
+        l.x = (float*)xcalloc(total_batch * l.outputs, sizeof(float));
+        l.x_norm = (float*)xcalloc(total_batch * l.outputs, sizeof(float));
     }
     if(adam){
         l.adam = 1;
-        l.m = (float*)calloc(l.nweights, sizeof(float));
-        l.v = (float*)calloc(l.nweights, sizeof(float));
-        l.bias_m = (float*)calloc(n, sizeof(float));
-        l.scale_m = (float*)calloc(n, sizeof(float));
-        l.bias_v = (float*)calloc(n, sizeof(float));
-        l.scale_v = (float*)calloc(n, sizeof(float));
+        l.m = (float*)xcalloc(l.nweights, sizeof(float));
+        l.v = (float*)xcalloc(l.nweights, sizeof(float));
+        l.bias_m = (float*)xcalloc(n, sizeof(float));
+        l.scale_m = (float*)xcalloc(n, sizeof(float));
+        l.bias_v = (float*)xcalloc(n, sizeof(float));
+        l.scale_v = (float*)xcalloc(n, sizeof(float));
     }
 
-    if(l.activation == SWISH) l.output_sigmoid = (float*)calloc(total_batch*l.outputs, sizeof(float));
+    if(l.activation == SWISH) l.output_sigmoid = (float*)xcalloc(total_batch*l.outputs, sizeof(float));
 
 #ifdef GPU
     if (l.activation == SWISH) l.output_sigmoid_gpu = cuda_make_array(l.output_sigmoid, total_batch*out_h*out_w*n);
@@ -620,11 +620,11 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
     l->outputs = l->out_h * l->out_w * l->out_c;
     l->inputs = l->w * l->h * l->c;
 
-    l->output = (float*)realloc(l->output, total_batch * l->outputs * sizeof(float));
-    l->delta = (float*)realloc(l->delta, total_batch * l->outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, total_batch * l->outputs * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, total_batch * l->outputs * sizeof(float));
     if(l->batch_normalize){
-        l->x = (float*)realloc(l->x, total_batch * l->outputs * sizeof(float));
-        l->x_norm = (float*)realloc(l->x_norm, total_batch * l->outputs * sizeof(float));
+        l->x = (float*)xrealloc(l->x, total_batch * l->outputs * sizeof(float));
+        l->x_norm = (float*)xrealloc(l->x_norm, total_batch * l->outputs * sizeof(float));
     }
 
     if (l->xnor) {
@@ -766,8 +766,8 @@ void binary_align_weights(convolutional_layer *l)
 
     size_t align_weights_size = new_lda * m;
     l->align_bit_weights_size = align_weights_size / 8 + 1;
-    float* align_weights = (float*)calloc(align_weights_size, sizeof(float));
-    l->align_bit_weights = (char*)calloc(l->align_bit_weights_size, sizeof(char));
+    float* align_weights = (float*)xcalloc(align_weights_size, sizeof(float));
+    l->align_bit_weights = (char*)xcalloc(l->align_bit_weights_size, sizeof(char));
 
     size_t i, j;
     // align A without transpose
@@ -1108,8 +1108,8 @@ void assisted_excitation_forward(convolutional_layer l, network_state state)
     //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n",
     //    epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num);
 
-    float *a_avg = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float));
-    float *g = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float));
+    float *a_avg = (float *)xcalloc(l.out_w * l.out_h * l.batch, sizeof(float));
+    float *g = (float *)xcalloc(l.out_w * l.out_h * l.batch, sizeof(float));
 
     int b;
     int w, h, c;
@@ -1310,7 +1310,7 @@ void rescale_weights(convolutional_layer l, float scale, float trans)
 
 image *get_weights(convolutional_layer l)
 {
-    image *weights = (image *)calloc(l.n, sizeof(image));
+    image *weights = (image *)xcalloc(l.n, sizeof(image));
     int i;
     for (i = 0; i < l.n; ++i) {
         weights[i] = copy_image(get_convolutional_weight(l, i));
diff --git a/src/cost_layer.c b/src/cost_layer.c
index 3038c3dafcd..ed1cc134479 100644
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@@ -41,9 +41,9 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
     l.inputs = inputs;
     l.outputs = inputs;
     l.cost_type = cost_type;
-    l.delta = (float*)calloc(inputs * batch, sizeof(float));
-    l.output = (float*)calloc(inputs * batch, sizeof(float));
-    l.cost = (float*)calloc(1, sizeof(float));
+    l.delta = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
 
     l.forward = forward_cost_layer;
     l.backward = backward_cost_layer;
@@ -61,8 +61,8 @@ void resize_cost_layer(cost_layer *l, int inputs)
 {
     l->inputs = inputs;
     l->outputs = inputs;
-    l->delta = (float*)realloc(l->delta, inputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, inputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, inputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, inputs * l->batch * sizeof(float));
 #ifdef GPU
     cuda_free(l->delta_gpu);
     cuda_free(l->output_gpu);
diff --git a/src/crnn_layer.c b/src/crnn_layer.c
index 7609003b4f2..e9f11fde2b7 100644
--- a/src/crnn_layer.c
+++ b/src/crnn_layer.c
@@ -47,19 +47,19 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
     l.hidden = h * w * hidden_filters;
     l.xnor = xnor;
 
-    l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float));
+    l.state = (float*)xcalloc(l.hidden * l.batch * (l.steps + 1), sizeof(float));
 
-    l.input_layer = (layer*)calloc(1, sizeof(layer));
+    l.input_layer = (layer*)xcalloc(1, sizeof(layer));
     *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.input_layer->batch = batch;
     if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
 
-    l.self_layer = (layer*)calloc(1, sizeof(layer));
+    l.self_layer = (layer*)xcalloc(1, sizeof(layer));
     *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.self_layer->batch = batch;
     if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
 
-    l.output_layer = (layer*)calloc(1, sizeof(layer));
+    l.output_layer = (layer*)xcalloc(1, sizeof(layer));
     *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
     l.output_layer->batch = batch;
     if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
@@ -121,7 +121,7 @@ void resize_crnn_layer(layer *l, int w, int h)
     assert(l->input_layer->outputs == l->self_layer->outputs);
     assert(l->input_layer->outputs == l->output_layer->inputs);
 
-    l->state = (float*)realloc(l->state, l->batch*l->hidden*(l->steps + 1)*sizeof(float));
+    l->state = (float*)xrealloc(l->state, l->batch*l->hidden*(l->steps + 1)*sizeof(float));
 
 #ifdef GPU
     if (l->state_gpu) cudaFree(l->state_gpu);
diff --git a/src/crop_layer.c b/src/crop_layer.c
index 092237f403d..2d1fafc22aa 100644
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@@ -1,3 +1,4 @@
+#include "utils.h"
 #include "crop_layer.h"
 #include "dark_cuda.h"
 #include <stdio.h>
@@ -32,7 +33,7 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int
     l.out_c = c;
     l.inputs = l.w * l.h * l.c;
     l.outputs = l.out_w * l.out_h * l.out_c;
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));
     l.forward = forward_crop_layer;
     l.backward = backward_crop_layer;
 
@@ -56,7 +57,7 @@ void resize_crop_layer(layer *l, int w, int h)
     l->inputs = l->w * l->h * l->c;
     l->outputs = l->out_h * l->out_w * l->out_c;
 
-    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float));
     #ifdef GPU
     cuda_free(l->output_gpu);
     l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
diff --git a/src/dark_cuda.c b/src/dark_cuda.c
index 50ac96e0fd7..3a74e3a5e85 100644
--- a/src/dark_cuda.c
+++ b/src/dark_cuda.c
@@ -257,7 +257,7 @@ void cuda_random(float *x_gpu, size_t n)
 
 float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
 {
-    float* tmp = (float*)calloc(n, sizeof(float));
+    float* tmp = (float*)xcalloc(n, sizeof(float));
     cuda_pull_array(x_gpu, tmp, n);
     //int i;
     //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
diff --git a/src/darknet.c b/src/darknet.c
index 77a89899263..8bb53cbbb4f 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -258,12 +258,12 @@ layer normalize_layer(layer l, int n)
 {
     int j;
     l.batch_normalize=1;
-    l.scales = (float*)calloc(n, sizeof(float));
+    l.scales = (float*)xcalloc(n, sizeof(float));
     for(j = 0; j < n; ++j){
         l.scales[j] = 1;
     }
-    l.rolling_mean = (float*)calloc(n, sizeof(float));
-    l.rolling_variance = (float*)calloc(n, sizeof(float));
+    l.rolling_mean = (float*)xcalloc(n, sizeof(float));
+    l.rolling_variance = (float*)xcalloc(n, sizeof(float));
     return l;
 }
 
diff --git a/src/data.c b/src/data.c
index 24755b875fe..4e265e3e05c 100644
--- a/src/data.c
+++ b/src/data.c
@@ -45,11 +45,11 @@ char **get_sequential_paths(char **paths, int n, int m, int mini_batch, int augm
 {
     int speed = rand_int(1, augment_speed);
     if (speed < 1) speed = 1;
-    char** sequentia_paths = (char**)calloc(n, sizeof(char*));
+    char** sequentia_paths = (char**)xcalloc(n, sizeof(char*));
     int i;
     pthread_mutex_lock(&mutex);
     //printf("n = %d, mini_batch = %d \n", n, mini_batch);
-    unsigned int *start_time_indexes = (unsigned int *)calloc(mini_batch, sizeof(unsigned int));
+    unsigned int *start_time_indexes = (unsigned int *)xcalloc(mini_batch, sizeof(unsigned int));
     for (i = 0; i < mini_batch; ++i) {
         start_time_indexes[i] = random_gen() % m;
         //printf(" start_time_indexes[i] = %u, ", start_time_indexes[i]);
@@ -75,7 +75,7 @@ char **get_sequential_paths(char **paths, int n, int m, int mini_batch, int augm
 
 char **get_random_paths(char **paths, int n, int m)
 {
-    char** random_paths = (char**)calloc(n, sizeof(char*));
+    char** random_paths = (char**)xcalloc(n, sizeof(char*));
     int i;
     pthread_mutex_lock(&mutex);
     //printf("n = %d \n", n);
@@ -94,7 +94,7 @@ char **get_random_paths(char **paths, int n, int m)
 
 char **find_replace_paths(char **paths, int n, char *find, char *replace)
 {
-    char** replace_paths = (char**)calloc(n, sizeof(char*));
+    char** replace_paths = (char**)xcalloc(n, sizeof(char*));
     int i;
     for(i = 0; i < n; ++i){
         char replaced[4096];
@@ -109,7 +109,7 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
     int i;
     matrix X;
     X.rows = n;
-    X.vals = (float**)calloc(X.rows, sizeof(float*));
+    X.vals = (float**)xcalloc(X.rows, sizeof(float*));
     X.cols = 0;
 
     for(i = 0; i < n; ++i){
@@ -130,7 +130,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
     int i;
     matrix X;
     X.rows = n;
-    X.vals = (float**)calloc(X.rows, sizeof(float*));
+    X.vals = (float**)xcalloc(X.rows, sizeof(float*));
     X.cols = 0;
 
     for(i = 0; i < n; ++i){
@@ -146,7 +146,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
     int i;
     matrix X;
     X.rows = n;
-    X.vals = (float**)calloc(X.rows, sizeof(float*));
+    X.vals = (float**)xcalloc(X.rows, sizeof(float*));
     X.cols = 0;
 
     for(i = 0; i < n; ++i){
@@ -173,7 +173,7 @@ extern int check_mistakes;
 
 box_label *read_boxes(char *filename, int *n)
 {
-    box_label* boxes = (box_label*)calloc(1, sizeof(box_label));
+    box_label* boxes = (box_label*)xcalloc(1, sizeof(box_label));
     FILE *file = fopen(filename, "r");
     if (!file) {
         printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename);
@@ -192,7 +192,7 @@ box_label *read_boxes(char *filename, int *n)
     int id;
     int count = 0;
     while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
-        boxes = (box_label*)realloc(boxes, (count + 1) * sizeof(box_label));
+        boxes = (box_label*)xrealloc(boxes, (count + 1) * sizeof(box_label));
         if(!boxes) {
           error("realloc failed");
         }
@@ -604,7 +604,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*3;
 
 
@@ -658,7 +658,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*6;
 
     int k = 2*(classes);
@@ -667,7 +667,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
         image im1 = load_image_color(paths[i*2],   w, h);
         image im2 = load_image_color(paths[i*2+1], w, h);
 
-        d.X.vals[i] = (float*)calloc(d.X.cols, sizeof(float));
+        d.X.vals[i] = (float*)xcalloc(d.X.cols, sizeof(float));
         memcpy(d.X.vals[i],         im1.data, h*w*3*sizeof(float));
         memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));
 
@@ -729,7 +729,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
     d.h = h;
 
     d.X.rows = 1;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*3;
 
     int k = (4+classes)*30;
@@ -828,7 +828,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*c;
 
     float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale = 0;
@@ -840,7 +840,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     for (i_mixup = 0; i_mixup <= mixup; i_mixup++) {
         if (i_mixup) augmentation_calculated = 0;
         for (i = 0; i < n; ++i) {
-            float *truth = (float*)calloc(5 * boxes, sizeof(float));
+            float *truth = (float*)xcalloc(5 * boxes, sizeof(float));
             const char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i];
 
             int flag = (c >= 3);
@@ -1001,7 +1001,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*c;
 
     float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale;
@@ -1013,7 +1013,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     for (i_mixup = 0; i_mixup <= mixup; i_mixup++) {
         if (i_mixup) augmentation_calculated = 0;
         for (i = 0; i < n; ++i) {
-            float *truth = (float*)calloc(5 * boxes, sizeof(float));
+            float *truth = (float*)xcalloc(5 * boxes, sizeof(float));
             char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i];
 
             image orig = load_image(filename, 0, 0, c);
@@ -1182,7 +1182,7 @@ void *load_thread(void *ptr)
 pthread_t load_data_in_thread(load_args args)
 {
     pthread_t thread;
-    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)xcalloc(1, sizeof(struct load_args));
     *ptr = args;
     if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
     return thread;
@@ -1197,8 +1197,8 @@ void *load_threads(void *ptr)
     data *out = args.d;
     int total = args.n;
     free(ptr);
-    data* buffers = (data*)calloc(args.threads, sizeof(data));
-    pthread_t* threads = (pthread_t*)calloc(args.threads, sizeof(pthread_t));
+    data* buffers = (data*)xcalloc(args.threads, sizeof(data));
+    pthread_t* threads = (pthread_t*)xcalloc(args.threads, sizeof(pthread_t));
     for(i = 0; i < args.threads; ++i){
         args.d = buffers + i;
         args.n = (i+1) * total/args.threads - i * total/args.threads;
@@ -1221,7 +1221,7 @@ void *load_threads(void *ptr)
 pthread_t load_data(load_args args)
 {
     pthread_t thread;
-    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)xcalloc(1, sizeof(struct load_args));
     *ptr = args;
     if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
     return thread;
@@ -1275,11 +1275,11 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
 
     int i;
     d.X.rows = n;
-    d.X.vals = (float**)calloc(n, sizeof(float*));
+    d.X.vals = (float**)xcalloc(n, sizeof(float*));
     d.X.cols = w*h*3;
 
     d.y.rows = n;
-    d.y.vals = (float**)calloc(n, sizeof(float*));
+    d.y.vals = (float**)xcalloc(n, sizeof(float*));
     d.y.cols = w*scale * h*scale * 3;
 
     for(i = 0; i < n; ++i){
@@ -1327,7 +1327,7 @@ matrix concat_matrix(matrix m1, matrix m2)
     matrix m;
     m.cols = m1.cols;
     m.rows = m1.rows+m2.rows;
-    m.vals = (float**)calloc(m1.rows + m2.rows, sizeof(float*));
+    m.vals = (float**)xcalloc(m1.rows + m2.rows, sizeof(float*));
     for(i = 0; i < m1.rows; ++i){
         m.vals[count++] = m1.vals[i];
     }
@@ -1579,8 +1579,8 @@ data get_random_data(data d, int num)
     r.X.cols = d.X.cols;
     r.y.cols = d.y.cols;
 
-    r.X.vals = (float**)calloc(num, sizeof(float*));
-    r.y.vals = (float**)calloc(num, sizeof(float*));
+    r.X.vals = (float**)xcalloc(num, sizeof(float*));
+    r.y.vals = (float**)xcalloc(num, sizeof(float*));
 
     int i;
     for(i = 0; i < num; ++i){
@@ -1593,7 +1593,7 @@ data get_random_data(data d, int num)
 
 data *split_data(data d, int part, int total)
 {
-    data* split = (data*)calloc(2, sizeof(data));
+    data* split = (data*)xcalloc(2, sizeof(data));
     int i;
     int start = part*d.X.rows/total;
     int end = (part+1)*d.X.rows/total;
@@ -1606,10 +1606,10 @@ data *split_data(data d, int part, int total)
     train.X.cols = test.X.cols = d.X.cols;
     train.y.cols = test.y.cols = d.y.cols;
 
-    train.X.vals = (float**)calloc(train.X.rows, sizeof(float*));
-    test.X.vals = (float**)calloc(test.X.rows, sizeof(float*));
-    train.y.vals = (float**)calloc(train.y.rows, sizeof(float*));
-    test.y.vals = (float**)calloc(test.y.rows, sizeof(float*));
+    train.X.vals = (float**)xcalloc(train.X.rows, sizeof(float*));
+    test.X.vals = (float**)xcalloc(test.X.rows, sizeof(float*));
+    train.y.vals = (float**)xcalloc(train.y.rows, sizeof(float*));
+    test.y.vals = (float**)xcalloc(test.y.rows, sizeof(float*));
 
     for(i = 0; i < start; ++i){
         train.X.vals[i] = d.X.vals[i];
diff --git a/src/deconvolutional_layer.c b/src/deconvolutional_layer.c
index a138fb5325f..4f4e4cc22d9 100644
--- a/src/deconvolutional_layer.c
+++ b/src/deconvolutional_layer.c
@@ -57,11 +57,11 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
     l.stride = stride;
     l.size = size;
 
-    l.weights = (float*)calloc(c * n * size * size, sizeof(float));
-    l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
+    l.weights = (float*)xcalloc(c * n * size * size, sizeof(float));
+    l.weight_updates = (float*)xcalloc(c * n * size * size, sizeof(float));
 
-    l.biases = (float*)calloc(n, sizeof(float));
-    l.bias_updates = (float*)calloc(n, sizeof(float));
+    l.biases = (float*)xcalloc(n, sizeof(float));
+    l.bias_updates = (float*)xcalloc(n, sizeof(float));
     float scale = 1./sqrt(size*size*c);
     for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
     for(i = 0; i < n; ++i){
@@ -76,9 +76,9 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
     l.outputs = l.out_w * l.out_h * l.out_c;
     l.inputs = l.w * l.h * l.c;
 
-    l.col_image = (float*)calloc(h * w * size * size * n, sizeof(float));
-    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.col_image = (float*)xcalloc(h * w * size * size * n, sizeof(float));
+    l.output = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.delta = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));
 
     l.forward = forward_deconvolutional_layer;
     l.backward = backward_deconvolutional_layer;
@@ -110,11 +110,11 @@ void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
     int out_h = deconvolutional_out_height(*l);
     int out_w = deconvolutional_out_width(*l);
 
-    l->col_image = (float*)realloc(l->col_image,
+    l->col_image = (float*)xrealloc(l->col_image,
                                 out_h*out_w*l->size*l->size*l->c*sizeof(float));
-    l->output = (float*)realloc(l->output,
+    l->output = (float*)xrealloc(l->output,
                                 l->batch*out_h * out_w * l->n*sizeof(float));
-    l->delta = (float*)realloc(l->delta,
+    l->delta = (float*)xrealloc(l->delta,
                                 l->batch*out_h * out_w * l->n*sizeof(float));
     #ifdef GPU
     cuda_free(l->col_image_gpu);
diff --git a/src/detection_layer.c b/src/detection_layer.c
index 64d133f964b..3c6528a9b77 100644
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@@ -25,11 +25,11 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
     l.w = side;
     l.h = side;
     assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
-    l.cost = (float*)calloc(1, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
     l.outputs = l.inputs;
     l.truths = l.side*l.side*(1+l.coords+l.classes);
-    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));
 
     l.forward = forward_detection_layer;
     l.backward = backward_detection_layer;
@@ -182,7 +182,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
         }
 
         if(0){
-            float* costs = (float*)calloc(l.batch * locations * l.n, sizeof(float));
+            float* costs = (float*)xcalloc(l.batch * locations * l.n, sizeof(float));
             for (b = 0; b < l.batch; ++b) {
                 int index = b*l.inputs;
                 for (i = 0; i < locations; ++i) {
@@ -259,11 +259,11 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state)
         return;
     }
 
-    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
+    float* in_cpu = (float*)xcalloc(l.batch * l.inputs, sizeof(float));
     float *truth_cpu = 0;
     if(state.truth){
         int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
-        truth_cpu = (float*)calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)xcalloc(num_truth, sizeof(float));
         cuda_pull_array(state.truth, truth_cpu, num_truth);
     }
     cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
diff --git a/src/detector.c b/src/detector.c
index 7a147e24939..c48b6449554 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -60,7 +60,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
     float avg_loss = -1;
-    network* nets = (network*)calloc(ngpus, sizeof(network));
+    network* nets = (network*)xcalloc(ngpus, sizeof(network));
 
     srand(time(0));
     int seed = rand();
@@ -475,7 +475,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
     }
     else {
         if (!outfile) outfile = "comp4_det_test_";
-        fps = (FILE**)calloc(classes, sizeof(FILE*));
+        fps = (FILE**)xcalloc(classes, sizeof(FILE*));
         for (j = 0; j < classes; ++j) {
             snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
             fps[j] = fopen(buff, "w");
@@ -492,11 +492,11 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
 
     int nthreads = 4;
     if (m < 4) nthreads = m;
-    image* val = (image*)calloc(nthreads, sizeof(image));
-    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)xcalloc(nthreads, sizeof(image));
+    image* val_resized = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)xcalloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t));
 
     load_args args = { 0 };
     args.w = net.w;
@@ -735,11 +735,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
 
     int nthreads = 4;
     if (m < 4) nthreads = m;
-    image* val = (image*)calloc(nthreads, sizeof(image));
-    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)xcalloc(nthreads, sizeof(image));
+    image* val_resized = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)xcalloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t));
 
     load_args args = { 0 };
     args.w = net.w;
@@ -753,16 +753,16 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     int tp_for_thresh = 0;
     int fp_for_thresh = 0;
 
-    box_prob* detections = (box_prob*)calloc(1, sizeof(box_prob));
+    box_prob* detections = (box_prob*)xcalloc(1, sizeof(box_prob));
     int detections_count = 0;
     int unique_truth_count = 0;
 
-    int* truth_classes_count = (int*)calloc(classes, sizeof(int));
+    int* truth_classes_count = (int*)xcalloc(classes, sizeof(int));
 
     // For multi-class precision and recall computation
-    float *avg_iou_per_class = (float*)calloc(classes, sizeof(float));
-    int *tp_for_thresh_per_class = (int*)calloc(classes, sizeof(int));
-    int *fp_for_thresh_per_class = (int*)calloc(classes, sizeof(int));
+    float *avg_iou_per_class = (float*)xcalloc(classes, sizeof(float));
+    int *tp_for_thresh_per_class = (int*)xcalloc(classes, sizeof(int));
+    int *fp_for_thresh_per_class = (int*)xcalloc(classes, sizeof(int));
 
     for (t = 0; t < nthreads; ++t) {
         args.path = paths[i + t];
@@ -834,7 +834,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
                     float prob = dets[i].prob[class_id];
                     if (prob > 0) {
                         detections_count++;
-                        detections = (box_prob*)realloc(detections, detections_count * sizeof(box_prob));
+                        detections = (box_prob*)xrealloc(detections, detections_count * sizeof(box_prob));
                         if (!detections) {
                           error("realloc failed");
                         }
@@ -945,19 +945,19 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     } pr_t;
 
     // for PR-curve
-    pr_t** pr = (pr_t**)calloc(classes, sizeof(pr_t*));
+    pr_t** pr = (pr_t**)xcalloc(classes, sizeof(pr_t*));
     for (i = 0; i < classes; ++i) {
-        pr[i] = (pr_t*)calloc(detections_count, sizeof(pr_t));
+        pr[i] = (pr_t*)xcalloc(detections_count, sizeof(pr_t));
     }
     printf("\n detections_count = %d, unique_truth_count = %d  \n", detections_count, unique_truth_count);
 
 
-    int* detection_per_class_count = (int*)calloc(classes, sizeof(int));
+    int* detection_per_class_count = (int*)xcalloc(classes, sizeof(int));
     for (j = 0; j < detections_count; ++j) {
         detection_per_class_count[detections[j].class_id]++;
     }
 
-    int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int));
+    int* truth_flags = (int*)xcalloc(unique_truth_count, sizeof(int));
 
     int rank;
     for (rank = 0; rank < detections_count; ++rank) {
@@ -1155,7 +1155,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
     }
 
     //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
-    float* rel_width_height_array = (float*)calloc(1000, sizeof(float));
+    float* rel_width_height_array = (float*)xcalloc(1000, sizeof(float));
 
 
     list *options = read_data_cfg(datacfg);
@@ -1191,7 +1191,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
                 if (check_mistakes) getchar();
             }
             number_of_boxes++;
-            rel_width_height_array = (float*)realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
+            rel_width_height_array = (float*)xrealloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
             if (!rel_width_height_array) {
               error("realloc failed");
             }
@@ -1354,7 +1354,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
 
         //box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
         //float **probs = calloc(l.w*l.h*l.n, sizeof(float*));
-        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
+        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)xcalloc(l.classes, sizeof(float));
 
         float *X = sized.data;
 
@@ -1490,7 +1490,7 @@ void run_detector(int argc, char **argv)
         for (i = 0; i < len; ++i) {
             if (gpu_list[i] == ',') ++ngpus;
         }
-        gpus = (int*)calloc(ngpus, sizeof(int));
+        gpus = (int*)xcalloc(ngpus, sizeof(int));
         for (i = 0; i < ngpus; ++i) {
             gpus[i] = atoi(gpu_list);
             gpu_list = strchr(gpu_list, ',') + 1;
diff --git a/src/dropout_layer.c b/src/dropout_layer.c
index 0d34ed24b2c..54e489fcc3c 100644
--- a/src/dropout_layer.c
+++ b/src/dropout_layer.c
@@ -12,7 +12,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
     l.inputs = inputs;
     l.outputs = inputs;
     l.batch = batch;
-    l.rand = (float*)calloc(inputs * batch, sizeof(float));
+    l.rand = (float*)xcalloc(inputs * batch, sizeof(float));
     l.scale = 1./(1.-probability);
     l.forward = forward_dropout_layer;
     l.backward = backward_dropout_layer;
@@ -27,7 +27,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
 
 void resize_dropout_layer(dropout_layer *l, int inputs)
 {
-    l->rand = (float*)realloc(l->rand, l->inputs * l->batch * sizeof(float));
+    l->rand = (float*)xrealloc(l->rand, l->inputs * l->batch * sizeof(float));
     #ifdef GPU
     cuda_free(l->rand_gpu);
 
diff --git a/src/gemm.c b/src/gemm.c
index abeae31cd71..1fec9322905 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -49,7 +49,7 @@ void gemm_bin(int M, int N, int K, float ALPHA,
 float *random_matrix(int rows, int cols)
 {
     int i;
-    float* m = (float*)calloc(rows * cols, sizeof(float));
+    float* m = (float*)xcalloc(rows * cols, sizeof(float));
     for(i = 0; i < rows*cols; ++i){
         m[i] = (float)rand()/RAND_MAX;
     }
@@ -2370,7 +2370,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size)
     memset(dst, 0, dst_size);
 
     size_t i;
-    char* byte_arr = (char*)calloc(size, sizeof(char));
+    char* byte_arr = (char*)xcalloc(size, sizeof(char));
     for (i = 0; i < size; ++i) {
         if (src[i] > 0) byte_arr[i] = 1;
     }
diff --git a/src/go.c b/src/go.c
index faa48d10d45..92f2f0618cc 100644
--- a/src/go.c
+++ b/src/go.c
@@ -32,21 +32,21 @@ moves load_go_moves(char *filename)
 {
     moves m;
     m.n = 128;
-    m.data = (char**)calloc(128, sizeof(char*));
+    m.data = (char**)xcalloc(128, sizeof(char*));
     FILE *fp = fopen(filename, "rb");
     int count = 0;
     char *line = 0;
     while((line = fgetgo(fp))){
         if(count >= m.n){
             m.n *= 2;
-            m.data = (char**)realloc(m.data, m.n * sizeof(char*));
+            m.data = (char**)xrealloc(m.data, m.n * sizeof(char*));
         }
         m.data[count] = line;
         ++count;
     }
     printf("%d\n", count);
     m.n = count;
-    m.data = (char**)realloc(m.data, count * sizeof(char*));
+    m.data = (char**)xrealloc(m.data, count * sizeof(char*));
     return m;
 }
 
@@ -127,11 +127,11 @@ void train_go(char *cfgfile, char *weightfile)
     char* backup_directory = "backup/";
 
     char buff[256];
-    float* board = (float*)calloc(19 * 19 * net.batch, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19 * net.batch, sizeof(float));
     if(!board) {
         error("calloc failed");
     }
-    float* move = (float*)calloc(19 * 19 * net.batch, sizeof(float));
+    float* move = (float*)xcalloc(19 * 19 * net.batch, sizeof(float));
     if(!move) {
         error("calloc failed");
     }
@@ -192,7 +192,7 @@ void propagate_liberty(float *board, int *lib, int *visited, int row, int col, i
 
 int *calculate_liberties(float *board)
 {
-    int* lib = (int*)calloc(19 * 19, sizeof(int));
+    int* lib = (int*)xcalloc(19 * 19, sizeof(int));
     int visited[361];
     int i, j;
     for(j = 0; j < 19; ++j){
@@ -416,11 +416,11 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
     set_batch_network(&net, 1);
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
 
-    float* board = (float*)calloc(19 * 19, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
     if(!board) {
         error("calloc failed");
     }
-    float* move = (float*)calloc(19 * 19, sizeof(float));
+    float* move = (float*)xcalloc(19 * 19, sizeof(float));
     if(!move) {
         error("calloc failed");
     }
@@ -452,9 +452,9 @@ void engine_go(char *filename, char *weightfile, int multi)
     }
     srand(time(0));
     set_batch_network(&net, 1);
-    float* board = (float*)calloc(19 * 19, sizeof(float));
-    char* one = (char*)calloc(91, sizeof(char));
-    char* two = (char*)calloc(91, sizeof(char));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
+    char* one = (char*)xcalloc(91, sizeof(char));
+    char* two = (char*)xcalloc(91, sizeof(char));
     int passed = 0;
     while(1){
         char buff[256];
@@ -625,8 +625,8 @@ void test_go(char *cfg, char *weights, int multi)
     }
     srand(time(0));
     set_batch_network(&net, 1);
-    float* board = (float*)calloc(19 * 19, sizeof(float));
-    float* move = (float*)calloc(19 * 19, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
+    float* move = (float*)xcalloc(19 * 19, sizeof(float));
     int color = 1;
     while(1){
         float *output = network_predict(net, board);
@@ -777,15 +777,15 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
     int count = 0;
     set_batch_network(&net, 1);
     set_batch_network(&net2, 1);
-    float* board = (float*)calloc(19 * 19, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
     if(!board) {
         error("calloc failed");
     }
-    char* one = (char*)calloc(91, sizeof(char));
+    char* one = (char*)xcalloc(91, sizeof(char));
     if(!one) {
         error("calloc failed");
     }
-    char* two = (char*)calloc(91, sizeof(char));
+    char* two = (char*)xcalloc(91, sizeof(char));
     if(!two) {
         error("calloc failed");
     }
diff --git a/src/gru_layer.c b/src/gru_layer.c
index 29acdaa2027..b24bee94ac4 100644
--- a/src/gru_layer.c
+++ b/src/gru_layer.c
@@ -74,16 +74,16 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
 
 
     l.outputs = outputs;
-    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.delta = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.state = (float*)calloc(outputs * batch, sizeof(float));
-    l.prev_state = (float*)calloc(outputs * batch, sizeof(float));
-    l.forgot_state = (float*)calloc(outputs * batch, sizeof(float));
-    l.forgot_delta = (float*)calloc(outputs * batch, sizeof(float));
-
-    l.r_cpu = (float*)calloc(outputs * batch, sizeof(float));
-    l.z_cpu = (float*)calloc(outputs * batch, sizeof(float));
-    l.h_cpu = (float*)calloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    l.delta = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    l.state = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.prev_state = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.forgot_state = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.forgot_delta = (float*)xcalloc(outputs * batch, sizeof(float));
+
+    l.r_cpu = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.z_cpu = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.h_cpu = (float*)xcalloc(outputs * batch, sizeof(float));
 
     l.forward = forward_gru_layer;
     l.backward = backward_gru_layer;
diff --git a/src/image.c b/src/image.c
index 2abd9f10815..57e26fe7f1e 100644
--- a/src/image.c
+++ b/src/image.c
@@ -249,9 +249,9 @@ image **load_alphabet()
 {
     int i, j;
     const int nsize = 8;
-    image** alphabets = (image**)calloc(nsize, sizeof(image*));
+    image** alphabets = (image**)xcalloc(nsize, sizeof(image*));
     for(j = 0; j < nsize; ++j){
-        alphabets[j] = (image*)calloc(128, sizeof(image));
+        alphabets[j] = (image*)xcalloc(128, sizeof(image));
         for(i = 32; i < 127; ++i){
             char buff[256];
             sprintf(buff, "data/labels/%d_%d.png", i, j);
@@ -267,7 +267,7 @@ image **load_alphabet()
 detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names)
 {
     int selected_num = 0;
-    detection_with_class* result_arr = (detection_with_class*)calloc(dets_num, sizeof(detection_with_class));
+    detection_with_class* result_arr = (detection_with_class*)xcalloc(dets_num, sizeof(detection_with_class));
     int i;
     for (i = 0; i < dets_num; ++i) {
         int best_class = -1;
@@ -619,8 +619,8 @@ void normalize_image(image p)
 
 void normalize_image2(image p)
 {
-    float* min = (float*)calloc(p.c, sizeof(float));
-    float* max = (float*)calloc(p.c, sizeof(float));
+    float* min = (float*)xcalloc(p.c, sizeof(float));
+    float* max = (float*)xcalloc(p.c, sizeof(float));
     int i,j;
     for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];
 
@@ -649,7 +649,7 @@ void normalize_image2(image p)
 image copy_image(image p)
 {
     image copy = p;
-    copy.data = (float*)calloc(p.h * p.w * p.c, sizeof(float));
+    copy.data = (float*)xcalloc(p.h * p.w * p.c, sizeof(float));
     memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
     return copy;
 }
@@ -679,7 +679,7 @@ void save_image_png(image im, const char *name)
     char buff[256];
     //sprintf(buff, "%s (%d)", name, windows);
     sprintf(buff, "%s.png", name);
-    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
+    unsigned char* data = (unsigned char*)xcalloc(im.w * im.h * im.c, sizeof(unsigned char));
     int i,k;
     for(k = 0; k < im.c; ++k){
         for(i = 0; i < im.w*im.h; ++i){
@@ -700,7 +700,7 @@ void save_image_options(image im, const char *name, IMTYPE f, int quality)
     else if (f == TGA) sprintf(buff, "%s.tga", name);
     else if (f == JPG) sprintf(buff, "%s.jpg", name);
     else               sprintf(buff, "%s.png", name);
-    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
+    unsigned char* data = (unsigned char*)xcalloc(im.w * im.h * im.c, sizeof(unsigned char));
     int i, k;
     for (k = 0; k < im.c; ++k) {
         for (i = 0; i < im.w*im.h; ++i) {
@@ -758,14 +758,14 @@ image make_empty_image(int w, int h, int c)
 image make_image(int w, int h, int c)
 {
     image out = make_empty_image(w,h,c);
-    out.data = (float*)calloc(h * w * c, sizeof(float));
+    out.data = (float*)xcalloc(h * w * c, sizeof(float));
     return out;
 }
 
 image make_random_image(int w, int h, int c)
 {
     image out = make_empty_image(w,h,c);
-    out.data = (float*)calloc(h * w * c, sizeof(float));
+    out.data = (float*)xcalloc(h * w * c, sizeof(float));
     int i;
     for(i = 0; i < w*h*c; ++i){
         out.data[i] = (rand_normal() * .25) + .5;
diff --git a/src/list.c b/src/list.c
index 6ba093022f6..580826d65ac 100644
--- a/src/list.c
+++ b/src/list.c
@@ -93,7 +93,7 @@ void free_list_contents_kvp(list *l)
 
 void **list_to_array(list *l)
 {
-    void** a = (void**)calloc(l->size, sizeof(void*));
+    void** a = (void**)xcalloc(l->size, sizeof(void*));
     int count = 0;
     node *n = l->front;
     while(n){
diff --git a/src/local_layer.c b/src/local_layer.c
index 9c68e9d897c..83b43c9a1b2 100644
--- a/src/local_layer.c
+++ b/src/local_layer.c
@@ -47,19 +47,19 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
     l.outputs = l.out_h * l.out_w * l.out_c;
     l.inputs = l.w * l.h * l.c;
 
-    l.weights = (float*)calloc(c * n * size * size * locations, sizeof(float));
-    l.weight_updates = (float*)calloc(c * n * size * size * locations, sizeof(float));
+    l.weights = (float*)xcalloc(c * n * size * size * locations, sizeof(float));
+    l.weight_updates = (float*)xcalloc(c * n * size * size * locations, sizeof(float));
 
-    l.biases = (float*)calloc(l.outputs, sizeof(float));
-    l.bias_updates = (float*)calloc(l.outputs, sizeof(float));
+    l.biases = (float*)xcalloc(l.outputs, sizeof(float));
+    l.bias_updates = (float*)xcalloc(l.outputs, sizeof(float));
 
     // float scale = 1./sqrt(size*size*c);
     float scale = sqrt(2./(size*size*c));
     for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
 
-    l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
-    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.col_image = (float*)xcalloc(out_h * out_w * size * size * c, sizeof(float));
+    l.output = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.delta = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));
 
     l.forward = forward_local_layer;
     l.backward = backward_local_layer;
diff --git a/src/lstm_layer.c b/src/lstm_layer.c
index 94664ce3aa1..e0cf77972b5 100644
--- a/src/lstm_layer.c
+++ b/src/lstm_layer.c
@@ -39,49 +39,49 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
     l.out_h = 1;
     l.out_c = outputs;
 
-    l.uf = (layer*)calloc(1, sizeof(layer));
+    l.uf = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.uf->batch = batch;
     if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
 
-    l.ui = (layer*)calloc(1, sizeof(layer));
+    l.ui = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.ui->batch = batch;
     if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
 
-    l.ug = (layer*)calloc(1, sizeof(layer));
+    l.ug = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.ug->batch = batch;
     if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
 
-    l.uo = (layer*)calloc(1, sizeof(layer));
+    l.uo = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.uo->batch = batch;
     if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
 
-    l.wf = (layer*)calloc(1, sizeof(layer));
+    l.wf = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wf->batch = batch;
     if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
 
-    l.wi = (layer*)calloc(1, sizeof(layer));
+    l.wi = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wi->batch = batch;
     if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
 
-    l.wg = (layer*)calloc(1, sizeof(layer));
+    l.wg = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wg->batch = batch;
     if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
 
-    l.wo = (layer*)calloc(1, sizeof(layer));
+    l.wo = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wo->batch = batch;
@@ -90,28 +90,28 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
     l.batch_normalize = batch_normalize;
     l.outputs = outputs;
 
-    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.state = (float*)calloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    l.state = (float*)xcalloc(outputs * batch, sizeof(float));
 
     l.forward = forward_lstm_layer;
     l.update = update_lstm_layer;
     l.backward = backward_lstm_layer;
 
-    l.prev_state_cpu =  (float*)calloc(batch*outputs, sizeof(float));
-    l.prev_cell_cpu =   (float*)calloc(batch*outputs, sizeof(float));
-    l.cell_cpu =        (float*)calloc(batch*outputs*steps, sizeof(float));
-
-    l.f_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.i_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.g_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.o_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.c_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.h_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.temp_cpu =        (float*)calloc(batch*outputs, sizeof(float));
-    l.temp2_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.temp3_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.dc_cpu =          (float*)calloc(batch*outputs, sizeof(float));
-    l.dh_cpu =          (float*)calloc(batch*outputs, sizeof(float));
+    l.prev_state_cpu =  (float*)xcalloc(batch*outputs, sizeof(float));
+    l.prev_cell_cpu =   (float*)xcalloc(batch*outputs, sizeof(float));
+    l.cell_cpu =        (float*)xcalloc(batch*outputs*steps, sizeof(float));
+
+    l.f_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.i_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.g_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.o_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.c_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.h_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp_cpu =        (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp2_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp3_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dc_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dh_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));
 
 #ifdef GPU
     l.forward_gpu = forward_lstm_layer_gpu;
diff --git a/src/matrix.c b/src/matrix.c
index aceca6e64ac..715ee80c9dd 100644
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -15,7 +15,7 @@ void free_matrix(matrix m)
 
 float matrix_topk_accuracy(matrix truth, matrix guess, int k)
 {
-    int* indexes = (int*)calloc(k, sizeof(int));
+    int* indexes = (int*)xcalloc(k, sizeof(int));
     int n = truth.cols;
     int i,j;
     int correct = 0;
@@ -48,15 +48,15 @@ matrix resize_matrix(matrix m, int size)
     int i;
     if (m.rows == size) return m;
     if (m.rows < size) {
-        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
+        m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
         for (i = m.rows; i < size; ++i) {
-            m.vals[i] = (float*)calloc(m.cols, sizeof(float));
+            m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
         }
     } else if (m.rows > size) {
         for (i = size; i < m.rows; ++i) {
             free(m.vals[i]);
         }
-        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
+        m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
     }
     m.rows = size;
     return m;
@@ -79,9 +79,9 @@ matrix make_matrix(int rows, int cols)
     matrix m;
     m.rows = rows;
     m.cols = cols;
-    m.vals = (float**)calloc(m.rows, sizeof(float*));
+    m.vals = (float**)xcalloc(m.rows, sizeof(float*));
     for(i = 0; i < m.rows; ++i){
-        m.vals[i] = (float*)calloc(m.cols, sizeof(float));
+        m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
     }
     return m;
 }
@@ -92,7 +92,7 @@ matrix hold_out_matrix(matrix *m, int n)
     matrix h;
     h.rows = n;
     h.cols = m->cols;
-    h.vals = (float**)calloc(h.rows, sizeof(float*));
+    h.vals = (float**)xcalloc(h.rows, sizeof(float*));
     for(i = 0; i < n; ++i){
         int index = rand()%m->rows;
         h.vals[i] = m->vals[index];
@@ -103,7 +103,7 @@ matrix hold_out_matrix(matrix *m, int n)
 
 float *pop_column(matrix *m, int c)
 {
-    float* col = (float*)calloc(m->rows, sizeof(float));
+    float* col = (float*)xcalloc(m->rows, sizeof(float));
     int i, j;
     for(i = 0; i < m->rows; ++i){
         col[i] = m->vals[i][c];
@@ -127,18 +127,18 @@ matrix csv_to_matrix(char *filename)
 
     int n = 0;
     int size = 1024;
-    m.vals = (float**)calloc(size, sizeof(float*));
+    m.vals = (float**)xcalloc(size, sizeof(float*));
     while((line = fgetl(fp))){
         if(m.cols == -1) m.cols = count_fields(line);
         if(n == size){
             size *= 2;
-            m.vals = (float**)realloc(m.vals, size * sizeof(float*));
+            m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
         }
         m.vals[n] = parse_fields(line, m.cols);
         free(line);
         ++n;
     }
-    m.vals = (float**)realloc(m.vals, n * sizeof(float*));
+    m.vals = (float**)xrealloc(m.vals, n * sizeof(float*));
     m.rows = n;
     return m;
 }
@@ -225,7 +225,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
     matrix old_centers = make_matrix(centers.rows, centers.cols);
 
     int i, j;
-    int *counts = (int*)calloc(centers.rows, sizeof(int));
+    int *counts = (int*)xcalloc(centers.rows, sizeof(int));
     for (i = 0; i < centers.rows; ++i) {
         for (j = 0; j < centers.cols; ++j) {
             old_centers.vals[i][j] = centers.vals[i][j];
@@ -269,7 +269,7 @@ void random_centers(matrix data, matrix centers) {
 int *sample(int n)
 {
     int i;
-    int* s = (int*)calloc(n, sizeof(int));
+    int* s = (int*)xcalloc(n, sizeof(int));
     for (i = 0; i < n; ++i) s[i] = i;
     for (i = n - 1; i >= 0; --i) {
         int swap = s[i];
@@ -301,7 +301,7 @@ void copy(float *x, float *y, int n)
 model do_kmeans(matrix data, int k)
 {
     matrix centers = make_matrix(k, data.cols);
-    int* assignments = (int*)calloc(data.rows, sizeof(int));
+    int* assignments = (int*)xcalloc(data.rows, sizeof(int));
     //smart_centers(data, centers);
     random_centers(data, centers);  // IoU = 67.31% after kmeans
 
diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c
index 4d2ee49f57f..797123f664a 100644
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@@ -71,9 +71,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
     l.size = size;
     l.stride = stride;
     int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.indexes = (int*)calloc(output_size, sizeof(int));
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.indexes = (int*)xcalloc(output_size, sizeof(int));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));
     l.forward = forward_maxpool_layer;
     l.backward = backward_maxpool_layer;
     #ifdef GPU
@@ -102,9 +102,9 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
     l->outputs = l->out_w * l->out_h * l->out_c;
     int output_size = l->outputs * l->batch;
 
-    l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int));
-    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
+    l->indexes = (int*)xrealloc(l->indexes, output_size * sizeof(int));
+    l->output = (float*)xrealloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float));
 
 #ifdef GPU
     CHECK_CUDA(cudaFree((float *)l->indexes_gpu));
diff --git a/src/network.c b/src/network.c
index fddfeb8a5ed..403c1ca0d08 100644
--- a/src/network.c
+++ b/src/network.c
@@ -232,16 +232,16 @@ network make_network(int n)
 {
     network net = {0};
     net.n = n;
-    net.layers = (layer*)calloc(net.n, sizeof(layer));
-    net.seen = (uint64_t*)calloc(1, sizeof(uint64_t));
+    net.layers = (layer*)xcalloc(net.n, sizeof(layer));
+    net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t));
 #ifdef GPU
-    net.input_gpu = (float**)calloc(1, sizeof(float*));
-    net.truth_gpu = (float**)calloc(1, sizeof(float*));
+    net.input_gpu = (float**)xcalloc(1, sizeof(float*));
+    net.truth_gpu = (float**)xcalloc(1, sizeof(float*));
 
-    net.input16_gpu = (float**)calloc(1, sizeof(float*));
-    net.output16_gpu = (float**)calloc(1, sizeof(float*));
-    net.max_input16_size = (size_t*)calloc(1, sizeof(size_t));
-    net.max_output16_size = (size_t*)calloc(1, sizeof(size_t));
+    net.input16_gpu = (float**)xcalloc(1, sizeof(float*));
+    net.output16_gpu = (float**)xcalloc(1, sizeof(float*));
+    net.max_input16_size = (size_t*)xcalloc(1, sizeof(size_t));
+    net.max_output16_size = (size_t*)xcalloc(1, sizeof(size_t));
 #endif
     return net;
 }
@@ -353,8 +353,8 @@ float train_network_datum(network net, float *x, float *y)
 float train_network_sgd(network net, data d, int n)
 {
     int batch = net.batch;
-    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
-    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
+    float* X = (float*)xcalloc(batch * d.X.cols, sizeof(float));
+    float* y = (float*)xcalloc(batch * d.y.cols, sizeof(float));
 
     int i;
     float sum = 0;
@@ -379,8 +379,8 @@ float train_network_waitkey(network net, data d, int wait_key)
     assert(d.X.rows % net.batch == 0);
     int batch = net.batch;
     int n = d.X.rows / batch;
-    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
-    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
+    float* X = (float*)xcalloc(batch * d.X.cols, sizeof(float));
+    float* y = (float*)xcalloc(batch * d.y.cols, sizeof(float));
 
     int i;
     float sum = 0;
@@ -450,11 +450,11 @@ int recalculate_workspace_size(network *net)
     }
     else {
         free(net->workspace);
-        net->workspace = (float*)calloc(1, workspace_size);
+        net->workspace = (float*)xcalloc(1, workspace_size);
     }
 #else
     free(net->workspace);
-    net->workspace = (float*)calloc(1, workspace_size);
+    net->workspace = (float*)xcalloc(1, workspace_size);
 #endif
     //fprintf(stderr, " Done!\n");
     return 0;
@@ -565,19 +565,19 @@ int resize_network(network *net, int w, int h)
             net->input_pinned_cpu_flag = 1;
         else {
             cudaGetLastError(); // reset CUDA-error
-            net->input_pinned_cpu = (float*)calloc(size, sizeof(float));
+            net->input_pinned_cpu = (float*)xcalloc(size, sizeof(float));
             net->input_pinned_cpu_flag = 0;
         }
         printf(" CUDA allocate done! \n");
     }else {
         free(net->workspace);
-        net->workspace = (float*)calloc(1, workspace_size);
+        net->workspace = (float*)xcalloc(1, workspace_size);
         if(!net->input_pinned_cpu_flag)
-            net->input_pinned_cpu = (float*)realloc(net->input_pinned_cpu, size * sizeof(float));
+            net->input_pinned_cpu = (float*)xrealloc(net->input_pinned_cpu, size * sizeof(float));
     }
 #else
     free(net->workspace);
-    net->workspace = (float*)calloc(1, workspace_size);
+    net->workspace = (float*)xcalloc(1, workspace_size);
 #endif
     //fprintf(stderr, " Done!\n");
     return 0;
@@ -702,11 +702,11 @@ detection *make_network_boxes(network *net, float thresh, int *num)
     int i;
     int nboxes = num_detections(net, thresh);
     if (num) *num = nboxes;
-    detection* dets = (detection*)calloc(nboxes, sizeof(detection));
+    detection* dets = (detection*)xcalloc(nboxes, sizeof(detection));
     for (i = 0; i < nboxes; ++i) {
-        dets[i].prob = (float*)calloc(l.classes, sizeof(float));
+        dets[i].prob = (float*)xcalloc(l.classes, sizeof(float));
         if (l.coords > 4) {
-            dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float));
+            dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
         }
     }
     return dets;
@@ -715,10 +715,10 @@ detection *make_network_boxes(network *net, float thresh, int *num)
 
 void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
 {
-    box* boxes = (box*)calloc(l.w * l.h * l.n, sizeof(box));
-    float** probs = (float**)calloc(l.w * l.h * l.n, sizeof(float*));
+    box* boxes = (box*)xcalloc(l.w * l.h * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(l.w * l.h * l.n, sizeof(float*));
     int i, j;
-    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
+    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)xcalloc(l.classes, sizeof(float));
     get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
     for (j = 0; j < l.w*l.h*l.n; ++j) {
         dets[j].classes = l.classes;
@@ -793,7 +793,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
 {
     const float thresh = 0.005; // function get_network_boxes() has already filtred dets by actual threshold
 
-    char *send_buf = (char *)calloc(1024, sizeof(char));
+    char *send_buf = (char *)xcalloc(1024, sizeof(char));
     if (filename) {
         sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
     }
@@ -810,7 +810,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
             {
                 if (class_id != -1) strcat(send_buf, ", \n");
                 class_id = j;
-                char *buf = (char *)calloc(2048, sizeof(char));
+                char *buf = (char *)xcalloc(2048, sizeof(char));
                 //sprintf(buf, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f}",
                 //    image_id, j, dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h, dets[i].prob[j]);
 
@@ -820,7 +820,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
                 int send_buf_len = strlen(send_buf);
                 int buf_len = strlen(buf);
                 int total_len = send_buf_len + buf_len + 100;
-                send_buf = (char *)realloc(send_buf, total_len * sizeof(char));
+                send_buf = (char *)xrealloc(send_buf, total_len * sizeof(char));
                 if (!send_buf) {
                   error("realloc failed");
                 }
@@ -878,7 +878,7 @@ matrix network_predict_data_multi(network net, data test, int n)
     int i,j,b,m;
     int k = get_network_output_size(net);
     matrix pred = make_matrix(test.X.rows, k);
-    float* X = (float*)calloc(net.batch * test.X.rows, sizeof(float));
+    float* X = (float*)xcalloc(net.batch * test.X.rows, sizeof(float));
     for(i = 0; i < test.X.rows; i += net.batch){
         for(b = 0; b < net.batch; ++b){
             if(i+b == test.X.rows) break;
@@ -903,7 +903,7 @@ matrix network_predict_data(network net, data test)
     int i,j,b;
     int k = get_network_output_size(net);
     matrix pred = make_matrix(test.X.rows, k);
-    float* X = (float*)calloc(net.batch * test.X.cols, sizeof(float));
+    float* X = (float*)xcalloc(net.batch * test.X.cols, sizeof(float));
     for(i = 0; i < test.X.rows; i += net.batch){
         for(b = 0; b < net.batch; ++b){
             if(i+b == test.X.rows) break;
diff --git a/src/normalization_layer.c b/src/normalization_layer.c
index 9d4afcbb83d..d8a2ac588bf 100644
--- a/src/normalization_layer.c
+++ b/src/normalization_layer.c
@@ -15,10 +15,10 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a
     layer.size = size;
     layer.alpha = alpha;
     layer.beta = beta;
-    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.squared = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.norms = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.squared = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.norms = (float*)xcalloc(h * w * c * batch, sizeof(float));
     layer.inputs = w*h*c;
     layer.outputs = layer.inputs;
 
@@ -46,10 +46,10 @@ void resize_normalization_layer(layer *layer, int w, int h)
     layer->out_w = w;
     layer->inputs = w*h*c;
     layer->outputs = layer->inputs;
-    layer->output = (float*)realloc(layer->output, h * w * c * batch * sizeof(float));
-    layer->delta = (float*)realloc(layer->delta, h * w * c * batch * sizeof(float));
-    layer->squared = (float*)realloc(layer->squared, h * w * c * batch * sizeof(float));
-    layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
+    layer->output = (float*)xrealloc(layer->output, h * w * c * batch * sizeof(float));
+    layer->delta = (float*)xrealloc(layer->delta, h * w * c * batch * sizeof(float));
+    layer->squared = (float*)xrealloc(layer->squared, h * w * c * batch * sizeof(float));
+    layer->norms = (float*)xrealloc(layer->norms, h * w * c * batch * sizeof(float));
 #ifdef GPU
     cuda_free(layer->output_gpu);
     cuda_free(layer->delta_gpu);
diff --git a/src/parser.c b/src/parser.c
index 427d8d38706..3d3c3752dec 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -319,7 +319,7 @@ int *parse_yolo_mask(char *a, int *num)
         for (i = 0; i < len; ++i) {
             if (a[i] == ',') ++n;
         }
-        mask = (int*)calloc(n, sizeof(int));
+        mask = (int*)xcalloc(n, sizeof(int));
         for (i = 0; i < n; ++i) {
             int val = atoi(a);
             mask[i] = val;
@@ -680,8 +680,8 @@ route_layer parse_route(list *options, size_params params, network net)
         if (l[i] == ',') ++n;
     }
 
-    int* layers = (int*)calloc(n, sizeof(int));
-    int* sizes = (int*)calloc(n, sizeof(int));
+    int* layers = (int*)xcalloc(n, sizeof(int));
+    int* sizes = (int*)xcalloc(n, sizeof(int));
     for(i = 0; i < n; ++i){
         int index = atoi(l);
         l = strchr(l, ',')+1;
@@ -799,9 +799,9 @@ void parse_net_options(list *options, network *net)
             for (i = 0; i < len; ++i) {
                 if (l[i] == ',') ++n;
             }
-            int* steps = (int*)calloc(n, sizeof(int));
-            float* scales = (float*)calloc(n, sizeof(float));
-            float* seq_scales = (float*)calloc(n, sizeof(float));
+            int* steps = (int*)xcalloc(n, sizeof(int));
+            float* scales = (float*)xcalloc(n, sizeof(float));
+            float* seq_scales = (float*)xcalloc(n, sizeof(float));
             for (i = 0; i < n; ++i) {
                 float scale = 1.0;
                 if (p) {
@@ -1007,7 +1007,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
         else {
             cudaGetLastError(); // reset CUDA-error
-            net.input_pinned_cpu = (float*)calloc(size, sizeof(float));
+            net.input_pinned_cpu = (float*)xcalloc(size, sizeof(float));
         }
 
         // pre-allocate memory for inference on Tensor Cores (fp16)
@@ -1022,12 +1022,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
             net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
         }
         else {
-            net.workspace = (float*)calloc(1, workspace_size);
+            net.workspace = (float*)xcalloc(1, workspace_size);
         }
     }
 #else
         if (workspace_size) {
-            net.workspace = (float*)calloc(1, workspace_size);
+            net.workspace = (float*)xcalloc(1, workspace_size);
         }
 #endif
 
@@ -1248,7 +1248,7 @@ void save_weights(network net, char *filename)
 
 void transpose_matrix(float *a, int rows, int cols)
 {
-    float* transpose = (float*)calloc(rows * cols, sizeof(float));
+    float* transpose = (float*)xcalloc(rows * cols, sizeof(float));
     int x, y;
     for(x = 0; x < rows; ++x){
         for(y = 0; y < cols; ++y){
@@ -1485,7 +1485,7 @@ void load_weights(network *net, char *filename)
 network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 {
     printf(" Try to load cfg: %s, clear = %d \n", cfg, clear);
-    network* net = (network*)calloc(1, sizeof(network));
+    network* net = (network*)xcalloc(1, sizeof(network));
     *net = parse_network_cfg_custom(cfg, batch, 0);
     if (weights && weights[0] != 0) {
         printf(" Try to load weights: %s \n", weights);
@@ -1499,7 +1499,7 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 network *load_network(char *cfg, char *weights, int clear)
 {
     printf(" Try to load cfg: %s, clear = %d \n", cfg, clear);
-    network* net = (network*)calloc(1, sizeof(network));
+    network* net = (network*)xcalloc(1, sizeof(network));
     *net = parse_network_cfg(cfg);
     if (weights && weights[0] != 0) {
         printf(" Try to load weights: %s \n", weights);
diff --git a/src/region_layer.c b/src/region_layer.c
index 4610d63737d..f062edabe2a 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -22,15 +22,15 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
     l.w = w;
     l.classes = classes;
     l.coords = coords;
-    l.cost = (float*)calloc(1, sizeof(float));
-    l.biases = (float*)calloc(n * 2, sizeof(float));
-    l.bias_updates = (float*)calloc(n * 2, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
+    l.biases = (float*)xcalloc(n * 2, sizeof(float));
+    l.bias_updates = (float*)xcalloc(n * 2, sizeof(float));
     l.outputs = h*w*n*(classes + coords + 1);
     l.inputs = l.outputs;
     l.max_boxes = max_boxes;
     l.truths = max_boxes*(5);
-    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
     int i;
     for(i = 0; i < n*2; ++i){
         l.biases[i] = .5;
@@ -63,8 +63,8 @@ void resize_region_layer(layer *l, int w, int h)
     l->outputs = h*w*l->n*(l->classes + l->coords + 1);
     l->inputs = l->outputs;
 
-    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
-    l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->batch * l->outputs * sizeof(float));
 
 #ifdef GPU
     if (old_w < w || old_h < h) {
@@ -446,11 +446,11 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
         softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
     }
 
-    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
+    float* in_cpu = (float*)xcalloc(l.batch * l.inputs, sizeof(float));
     float *truth_cpu = 0;
     if(state.truth){
         int num_truth = l.batch*l.truths;
-        truth_cpu = (float*)calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)xcalloc(num_truth, sizeof(float));
         cuda_pull_array(state.truth, truth_cpu, num_truth);
     }
     cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);
diff --git a/src/reorg_layer.c b/src/reorg_layer.c
index 8094a066328..10c11199a0e 100644
--- a/src/reorg_layer.c
+++ b/src/reorg_layer.c
@@ -27,8 +27,8 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
     l.outputs = l.out_h * l.out_w * l.out_c;
     l.inputs = h*w*c;
     int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));
 
     l.forward = forward_reorg_layer;
     l.backward = backward_reorg_layer;
@@ -64,8 +64,8 @@ void resize_reorg_layer(layer *l, int w, int h)
     l->inputs = l->outputs;
     int output_size = l->outputs * l->batch;
 
-    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
+    l->output = (float*)xrealloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/reorg_old_layer.c b/src/reorg_old_layer.c
index 530da202094..f6ad2a8b251 100644
--- a/src/reorg_old_layer.c
+++ b/src/reorg_old_layer.c
@@ -27,8 +27,8 @@ layer make_reorg_old_layer(int batch, int w, int h, int c, int stride, int rever
     l.outputs = l.out_h * l.out_w * l.out_c;
     l.inputs = h*w*c;
     int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));
 
     l.forward = forward_reorg_old_layer;
     l.backward = backward_reorg_old_layer;
@@ -64,8 +64,8 @@ void resize_reorg_old_layer(layer *l, int w, int h)
     l->inputs = l->outputs;
     int output_size = l->outputs * l->batch;
 
-    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
+    l->output = (float*)xrealloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/rnn.c b/src/rnn.c
index 49cadd342d5..bfc57b709df 100644
--- a/src/rnn.c
+++ b/src/rnn.c
@@ -14,7 +14,7 @@ int *read_tokenized_data(char *filename, size_t *read)
     size_t size = 512;
     size_t count = 0;
     FILE *fp = fopen(filename, "r");
-    int* d = (int*)calloc(size, sizeof(int));
+    int* d = (int*)xcalloc(size, sizeof(int));
     if(!d) {
       error("calloc failed");
     }
@@ -24,7 +24,7 @@ int *read_tokenized_data(char *filename, size_t *read)
         ++count;
         if(count > size){
             size = size*2;
-            d = (int*)realloc(d, size * sizeof(int));
+            d = (int*)xrealloc(d, size * sizeof(int));
             if(!d) {
               error("realloc failed");
             }
@@ -33,7 +33,7 @@ int *read_tokenized_data(char *filename, size_t *read)
         one = fscanf(fp, "%d", &n);
     }
     fclose(fp);
-    d = (int*)realloc(d, count * sizeof(int));
+    d = (int*)xrealloc(d, count * sizeof(int));
     if(!d) {
       error("realloc failed");
     }
@@ -46,13 +46,13 @@ char **read_tokens(char *filename, size_t *read)
     size_t size = 512;
     size_t count = 0;
     FILE *fp = fopen(filename, "r");
-    char** d = (char**)calloc(size, sizeof(char*));
+    char** d = (char**)xcalloc(size, sizeof(char*));
     char *line;
     while((line=fgetl(fp)) != 0){
         ++count;
         if(count > size){
             size = size*2;
-            d = (char**)realloc(d, size * sizeof(char*));
+            d = (char**)xrealloc(d, size * sizeof(char*));
             if(!d) {
               error("realloc failed");
             }
@@ -60,7 +60,7 @@ char **read_tokens(char *filename, size_t *read)
         d[count-1] = line;
     }
     fclose(fp);
-    d = (char**)realloc(d, count * sizeof(char*));
+    d = (char**)xrealloc(d, count * sizeof(char*));
     if(!d) {
       error("realloc failed");
     }
@@ -70,8 +70,8 @@ char **read_tokens(char *filename, size_t *read)
 
 float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
-    float* x = (float*)calloc(batch * steps * characters, sizeof(float));
-    float* y = (float*)calloc(batch * steps * characters, sizeof(float));
+    float* x = (float*)xcalloc(batch * steps * characters, sizeof(float));
+    float* y = (float*)xcalloc(batch * steps * characters, sizeof(float));
     int i,j;
     for(i = 0; i < batch; ++i){
         for(j = 0; j < steps; ++j){
@@ -96,8 +96,8 @@ float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size
 
 float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
-    float* x = (float*)calloc(batch * steps * characters, sizeof(float));
-    float* y = (float*)calloc(batch * steps * characters, sizeof(float));
+    float* x = (float*)xcalloc(batch * steps * characters, sizeof(float));
+    float* y = (float*)xcalloc(batch * steps * characters, sizeof(float));
     int i,j;
     for(i = 0; i < batch; ++i){
         for(j = 0; j < steps; ++j){
@@ -152,7 +152,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
         size = ftell(fp);
         fseek(fp, 0, SEEK_SET);
 
-        text = (unsigned char *)calloc(size + 1, sizeof(char));
+        text = (unsigned char *)xcalloc(size + 1, sizeof(char));
         fread(text, 1, size, fp);
         fclose(fp);
     }
@@ -176,7 +176,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
     int streams = batch/steps;
     printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %ld \n", batch, steps, streams, net.subdivisions, size);
     printf(" global_batch = %d \n", batch*net.subdivisions);
-    size_t* offsets = (size_t*)calloc(streams, sizeof(size_t));
+    size_t* offsets = (size_t*)xcalloc(streams, sizeof(size_t));
     int j;
     for(j = 0; j < streams; ++j){
         offsets[j] = rand_size_t()%size;
@@ -259,7 +259,7 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
     for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
     int c = 0;
     int len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
 
     /*
        fill_cpu(inputs, 0, input, 1);
@@ -318,7 +318,7 @@ void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int r
     int i, j;
     for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
     int c = 0;
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
     float *out = 0;
 
     while((c = getc(stdin)) != EOF){
@@ -357,7 +357,7 @@ void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed)
     int words = 1;
     int c;
     int len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
     int i;
     for(i = 0; i < len; ++i){
         c = seed[i];
@@ -409,7 +409,7 @@ void valid_char_rnn(char *cfgfile, char *weightfile, char *seed)
     int words = 1;
     int c;
     int len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
     int i;
     for(i = 0; i < len; ++i){
         c = seed[i];
@@ -448,7 +448,7 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)
 
     int c;
     int seed_len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
     int i;
     char *line;
     while((line=fgetl(stdin)) != 0){
diff --git a/src/rnn_layer.c b/src/rnn_layer.c
index 4b5b9c2c64c..328bcbf07d9 100644
--- a/src/rnn_layer.c
+++ b/src/rnn_layer.c
@@ -40,21 +40,21 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
     l.out_h = 1;
     l.out_c = outputs;
 
-    l.state = (float*)calloc(batch * hidden * (steps + 1), sizeof(float));
+    l.state = (float*)xcalloc(batch * hidden * (steps + 1), sizeof(float));
 
-    l.input_layer = (layer*)calloc(1, sizeof(layer));
+    l.input_layer = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.input_layer) = make_connected_layer(batch, steps, inputs, hidden, activation, batch_normalize);
     l.input_layer->batch = batch;
     if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
 
-    l.self_layer = (layer*)calloc(1, sizeof(layer));
+    l.self_layer = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.self_layer) = make_connected_layer(batch, steps, hidden, hidden, (log==2)?LOGGY:(log==1?LOGISTIC:activation), batch_normalize);
     l.self_layer->batch = batch;
     if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
 
-    l.output_layer = (layer*)calloc(1, sizeof(layer));
+    l.output_layer = (layer*)xcalloc(1, sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.output_layer) = make_connected_layer(batch, steps, hidden, outputs, activation, batch_normalize);
     l.output_layer->batch = batch;
diff --git a/src/rnn_vid.c b/src/rnn_vid.c
index a0fb0a3c790..a5ff5278435 100644
--- a/src/rnn_vid.c
+++ b/src/rnn_vid.c
@@ -20,10 +20,10 @@ float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int ste
     image out_im = get_network_image(net);
     int output_size = out_im.w*out_im.h*out_im.c;
     printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
-    float* feats = (float*)calloc(net.batch * batch * output_size, sizeof(float));
+    float* feats = (float*)xcalloc(net.batch * batch * output_size, sizeof(float));
     for(b = 0; b < batch; ++b){
         int input_size = net.w*net.h*net.c;
-        float* input = (float*)calloc(input_size * net.batch, sizeof(float));
+        float* input = (float*)xcalloc(input_size * net.batch, sizeof(float));
         char *filename = files[rand()%n];
         cap_cv *cap = get_capture_video_stream(filename);
         int frames = get_capture_frame_count_cv(cap);
diff --git a/src/route_layer.c b/src/route_layer.c
index b502fbe72b0..a9d35fc3582 100644
--- a/src/route_layer.c
+++ b/src/route_layer.c
@@ -21,8 +21,8 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz
     fprintf(stderr, "\n");
     l.outputs = outputs;
     l.inputs = outputs;
-    l.delta = (float*)calloc(outputs * batch, sizeof(float));
-    l.output = (float*)calloc(outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch, sizeof(float));
 
     l.forward = forward_route_layer;
     l.backward = backward_route_layer;
@@ -58,8 +58,8 @@ void resize_route_layer(route_layer *l, network *net)
         }
     }
     l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/sam_layer.c b/src/sam_layer.c
index da28e319775..fbb1aadb4c4 100644
--- a/src/sam_layer.c
+++ b/src/sam_layer.c
@@ -24,8 +24,8 @@ layer make_sam_layer(int batch, int index, int w, int h, int c, int w2, int h2,
     l.inputs = l.outputs;
     l.index = index;
 
-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));
 
     l.forward = forward_sam_layer;
     l.backward = backward_sam_layer;
@@ -45,8 +45,8 @@ void resize_sam_layer(layer *l, int w, int h)
     l->out_h = h;
     l->outputs = l->out_w*l->out_h*l->out_c;
     l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/scale_channels_layer.c b/src/scale_channels_layer.c
index 7322570116e..847eeda417c 100644
--- a/src/scale_channels_layer.c
+++ b/src/scale_channels_layer.c
@@ -24,8 +24,8 @@ layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w
     l.inputs = l.outputs;
     l.index = index;
 
-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));
 
     l.forward = forward_scale_channels_layer;
     l.backward = backward_scale_channels_layer;
@@ -45,8 +45,8 @@ void resize_scale_channels_layer(layer *l, int w, int h)
     l->out_h = h;
     l->outputs = l->out_w*l->out_h*l->out_c;
     l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c
index 1f7c6d35e90..fdc70aeda9e 100644
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@@ -23,8 +23,8 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
 
     l.index = index;
 
-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));
 
     l.forward = forward_shortcut_layer;
     l.backward = backward_shortcut_layer;
@@ -46,8 +46,8 @@ void resize_shortcut_layer(layer *l, int w, int h)
     l->h = l->out_h = h;
     l->outputs = w*h*l->out_c;
     l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 9bbff9a028b..4977c2d6442 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -36,10 +36,10 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
     l.groups = groups;
     l.inputs = inputs;
     l.outputs = inputs;
-    l.loss = (float*)calloc(inputs * batch, sizeof(float));
-    l.output = (float*)calloc(inputs * batch, sizeof(float));
-    l.delta = (float*)calloc(inputs * batch, sizeof(float));
-    l.cost = (float*)calloc(1, sizeof(float));
+    l.loss = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
 
     l.forward = forward_softmax_layer;
     l.backward = backward_softmax_layer;
diff --git a/src/tree.c b/src/tree.c
index 4383d693e13..8a2c23169b2 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -93,42 +93,42 @@ tree *read_tree(char *filename)
     int groups = 0;
     int n = 0;
     while((line=fgetl(fp)) != 0){
-        char* id = (char*)calloc(256, sizeof(char));
+        char* id = (char*)xcalloc(256, sizeof(char));
         int parent = -1;
         sscanf(line, "%s %d", id, &parent);
-        t.parent = (int*)realloc(t.parent, (n + 1) * sizeof(int));
+        t.parent = (int*)xrealloc(t.parent, (n + 1) * sizeof(int));
         t.parent[n] = parent;
 
-        t.name = (char**)realloc(t.name, (n + 1) * sizeof(char*));
+        t.name = (char**)xrealloc(t.name, (n + 1) * sizeof(char*));
         t.name[n] = id;
         if(parent != last_parent){
             ++groups;
-            t.group_offset = (int*)realloc(t.group_offset, groups * sizeof(int));
+            t.group_offset = (int*)xrealloc(t.group_offset, groups * sizeof(int));
             t.group_offset[groups - 1] = n - group_size;
-            t.group_size = (int*)realloc(t.group_size, groups * sizeof(int));
+            t.group_size = (int*)xrealloc(t.group_size, groups * sizeof(int));
             t.group_size[groups - 1] = group_size;
             group_size = 0;
             last_parent = parent;
         }
-        t.group = (int*)realloc(t.group, (n + 1) * sizeof(int));
+        t.group = (int*)xrealloc(t.group, (n + 1) * sizeof(int));
         t.group[n] = groups;
         ++n;
         ++group_size;
     }
     ++groups;
-    t.group_offset = (int*)realloc(t.group_offset, groups * sizeof(int));
+    t.group_offset = (int*)xrealloc(t.group_offset, groups * sizeof(int));
     t.group_offset[groups - 1] = n - group_size;
-    t.group_size = (int*)realloc(t.group_size, groups * sizeof(int));
+    t.group_size = (int*)xrealloc(t.group_size, groups * sizeof(int));
     t.group_size[groups - 1] = group_size;
     t.n = n;
     t.groups = groups;
-    t.leaf = (int*)calloc(n, sizeof(int));
+    t.leaf = (int*)xcalloc(n, sizeof(int));
     int i;
     for(i = 0; i < n; ++i) t.leaf[i] = 1;
     for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0;
 
     fclose(fp);
-    tree* tree_ptr = (tree*)calloc(1, sizeof(tree));
+    tree* tree_ptr = (tree*)xcalloc(1, sizeof(tree));
     *tree_ptr = t;
     //error(0);
     return tree_ptr;
diff --git a/src/upsample_layer.c b/src/upsample_layer.c
index 70d8de4d963..7e756782994 100644
--- a/src/upsample_layer.c
+++ b/src/upsample_layer.c
@@ -24,8 +24,8 @@ layer make_upsample_layer(int batch, int w, int h, int c, int stride)
     l.stride = stride;
     l.outputs = l.out_w*l.out_h*l.out_c;
     l.inputs = l.w*l.h*l.c;
-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));
 
     l.forward = forward_upsample_layer;
     l.backward = backward_upsample_layer;
@@ -53,8 +53,8 @@ void resize_upsample_layer(layer *l, int w, int h)
     }
     l->outputs = l->out_w*l->out_h*l->out_c;
     l->inputs = l->h*l->w*l->c;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/utils.c b/src/utils.c
index 2c3eb83f7c2..60c2f5a614c 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -56,7 +56,7 @@ int *read_map(char *filename)
     if(!file) file_error(filename);
     while((str=fgetl(file))){
         ++n;
-        map = (int*)realloc(map, n * sizeof(int));
+        map = (int*)xrealloc(map, n * sizeof(int));
         if(!map) {
           error("realloc failed");
         }
@@ -81,7 +81,7 @@ void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections)
 void shuffle(void *arr, size_t n, size_t size)
 {
     size_t i;
-    void* swp = (void*)calloc(1, size);
+    void* swp = (void*)xcalloc(1, size);
     for(i = 0; i < n-1; ++i){
         size_t j = i + random_gen()/(RAND_MAX / (n-i)+1);
         memcpy(swp,            (char*)arr+(j*size), size);
@@ -418,7 +418,7 @@ char *fgetl(FILE *fp)
     while((line[curr-1] != '\n') && !feof(fp)){
         if(curr == size-1){
             size *= 2;
-            line = (char*)realloc(line, size * sizeof(char));
+            line = (char*)xrealloc(line, size * sizeof(char));
             if(!line) {
                 printf("%ld\n", size);
                 malloc_error();
@@ -497,6 +497,9 @@ void write_all(int fd, char *buffer, size_t bytes)
 
 char *copy_string(char *s)
 {
+    if(!s) {
+        return NULL;
+    }
     char* copy = (char*)malloc(strlen(s) + 1);
     strncpy(copy, s, strlen(s)+1);
     return copy;
@@ -533,7 +536,7 @@ int count_fields(char *line)
 
 float *parse_fields(char *line, int n)
 {
-    float* field = (float*)calloc(n, sizeof(float));
+    float* field = (float*)xcalloc(n, sizeof(float));
     char *c, *p, *end;
     int count = 0;
     int done = 0;
@@ -721,8 +724,8 @@ int max_index(float *a, int n)
 int top_max_index(float *a, int n, int k)
 {
     if (n <= 0) return -1;
-    float *values = (float*)calloc(k, sizeof(float));
-    int *indexes = (int*)calloc(k, sizeof(int));
+    float *values = (float*)xcalloc(k, sizeof(float));
+    int *indexes = (int*)xcalloc(k, sizeof(int));
     int i, j;
     for (i = 0; i < n; ++i) {
         for (j = 0; j < k; ++j) {
@@ -835,9 +838,9 @@ float rand_scale(float s)
 float **one_hot_encode(float *a, int n, int k)
 {
     int i;
-    float** t = (float**)calloc(n, sizeof(float*));
+    float** t = (float**)xcalloc(n, sizeof(float*));
     for(i = 0; i < n; ++i){
-        t[i] = (float*)calloc(k, sizeof(float));
+        t[i] = (float*)xcalloc(k, sizeof(float));
         int index = (int)a[i];
         t[i][index] = 1;
     }
@@ -923,7 +926,7 @@ int check_array_is_inf(float *arr, int size)
 
 int *random_index_order(int min, int max)
 {
-    int *inds = (int *)calloc(max - min, sizeof(int));
+    int *inds = (int *)xcalloc(max - min, sizeof(int));
     int i;
     for (i = min; i < max; ++i) {
         inds[i - min] = i;
diff --git a/src/utils.h b/src/utils.h
index 70d729b7617..05b6da21600 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -14,6 +14,7 @@ LIB_API void free_ptrs(void **ptrs, int n);
 LIB_API void top_k(float *a, int n, int k, int *index);
 
 void *xcalloc(size_t nmemb, size_t size);
+void *xrealloc(void *ptr, size_t size);
 double what_time_is_it_now();
 int *read_map(char *filename);
 void shuffle(void *arr, size_t n, size_t size);
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index 1619d8afa3a..fe2e85ebeba 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -26,22 +26,22 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
     l.out_h = l.h;
     l.out_c = l.c;
     l.classes = classes;
-    l.cost = (float*)calloc(1, sizeof(float));
-    l.biases = (float*)calloc(total * 2, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
+    l.biases = (float*)xcalloc(total * 2, sizeof(float));
     if(mask) l.mask = mask;
     else{
-        l.mask = (int*)calloc(n, sizeof(int));
+        l.mask = (int*)xcalloc(n, sizeof(int));
         for(i = 0; i < n; ++i){
             l.mask[i] = i;
         }
     }
-    l.bias_updates = (float*)calloc(n * 2, sizeof(float));
+    l.bias_updates = (float*)xcalloc(n * 2, sizeof(float));
     l.outputs = h*w*n*(classes + 4 + 1);
     l.inputs = l.outputs;
     l.max_boxes = max_boxes;
     l.truths = l.max_boxes*(4 + 1);    // 90*(4 + 1);
-    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
     for(i = 0; i < total*2; ++i){
         l.biases[i] = .5;
     }
@@ -58,14 +58,14 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
     if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1;
     else {
         cudaGetLastError(); // reset CUDA-error
-        l.output = (float*)calloc(batch * l.outputs, sizeof(float));
+        l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
     }
 
     free(l.delta);
     if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1;
     else {
         cudaGetLastError(); // reset CUDA-error
-        l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
+        l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));
     }
 #endif
 
@@ -83,15 +83,15 @@ void resize_yolo_layer(layer *l, int w, int h)
     l->outputs = h*w*l->n*(l->classes + 4 + 1);
     l->inputs = l->outputs;
 
-    if (!l->output_pinned) l->output = (float*)realloc(l->output, l->batch*l->outputs * sizeof(float));
-    if (!l->delta_pinned) l->delta = (float*)realloc(l->delta, l->batch*l->outputs*sizeof(float));
+    if (!l->output_pinned) l->output = (float*)xrealloc(l->output, l->batch*l->outputs * sizeof(float));
+    if (!l->delta_pinned) l->delta = (float*)xrealloc(l->delta, l->batch*l->outputs*sizeof(float));
 
 #ifdef GPU
     if (l->output_pinned) {
         cudaFreeHost(l->output);
         if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
             cudaGetLastError(); // reset CUDA-error
-            l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
+            l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float));
             l->output_pinned = 0;
         }
     }
@@ -100,7 +100,7 @@ void resize_yolo_layer(layer *l, int w, int h)
         cudaFreeHost(l->delta);
         if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
             cudaGetLastError(); // reset CUDA-error
-            l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
+            l->delta = (float*)xrealloc(l->delta, l->batch * l->outputs * sizeof(float));
             l->delta_pinned = 0;
         }
     }
@@ -377,7 +377,7 @@ void forward_yolo_layer(const layer l, network_state state)
         // TODO: remove IOU loss fields before computing MSE on class
         //   probably split into two arrays
         int stride = l.w*l.h;
-        float* no_iou_loss_delta = (float *)calloc(l.batch * l.outputs, sizeof(float));
+        float* no_iou_loss_delta = (float *)xcalloc(l.batch * l.outputs, sizeof(float));
         memcpy(no_iou_loss_delta, l.delta, l.batch * l.outputs * sizeof(float));
         for (b = 0; b < l.batch; ++b) {
             for (j = 0; j < l.h; ++j) {
@@ -547,13 +547,13 @@ void forward_yolo_layer_gpu(const layer l, network_state state)
         return;
     }
 
-    float *in_cpu = (float *)calloc(l.batch*l.inputs, sizeof(float));
+    float *in_cpu = (float *)xcalloc(l.batch*l.inputs, sizeof(float));
     cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
     memcpy(in_cpu, l.output, l.batch*l.outputs*sizeof(float));
     float *truth_cpu = 0;
     if (state.truth) {
         int num_truth = l.batch*l.truths;
-        truth_cpu = (float *)calloc(num_truth, sizeof(float));
+        truth_cpu = (float *)xcalloc(num_truth, sizeof(float));
         cuda_pull_array(state.truth, truth_cpu, num_truth);
     }
     network_state cpu_state = state;