-
Notifications
You must be signed in to change notification settings - Fork 0
/
chapter2.html
1134 lines (1116 loc) · 181 KB
/
chapter2.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Another Book on Data Science - More on R/Python Programming</title>
<meta name="description" content="data science, R, Python, programming, machine learning">
<meta name="author" content="Nailong Zhang">
<!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
<!--[if lt IE 9]>
<script src="https://html5shim.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
<!-- Le styles -->
<link rel="stylesheet" href="bootstrap-1.1.0.min.css">
<link rel="stylesheet" href="style.css">
<link rel="stylesheet" href="small-screens.css">
<link rel="stylesheet" href="vs.css">
<link rel="stylesheet" href="code.css">
<link rel="stylesheet" href="application.css">
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-142297640-1', 'anotherbookondatascience.com');
ga('send', 'pageview');
</script>
</head>
<body>
<div class="topbar">
<div class="fill">
<div class="container-fluid fixed">
<h3><a href="index.html">More on R/Python Programming</a></h3>
<ul class="nav secondary-nav">
<li><a href="chapter1.html">«Previous</a></li>
<li><a href="chapter3.html">Next»</a></li>
</ul>
</div>
</div>
</div>
<div class="container-fluid" style="padding-top: 60px;">
<p>Sections in this Chapter:</p>
<ul>
<li><a href="#script">Write & run scripts</a></li>
<li><a href="#debug">Debugging</a></li>
<li><a href="#benchmark">Benchmarking</a></li>
<li><a href="#vectorization">Vectorization</a></li>
<li><a href="#parallelism">Embarrassingly parallelism in R/Python</a></li>
<li><a href="#evaluation">Evaluation strategy</a></li>
<li><a href="#cpp">Speed up with C/C++ in R/Python</a></li>
<li><a href="#fp">A first impression of functional programming</a></li>
<li><a href="#miscellaneous">Miscellaneous</a></li>
</ul>
<h2 id="script">Write & run scripts</h2>
<p>In the first Chapter, we are coding within the interactive mode of R/Python. When we are working on the real world projects, using an Integrated development environment (<span class="caps">IDE</span>) is a more pragmatic choice. There are not many choices for R <span class="caps">IDE</span>, and among them RStudio<sup class="footnote" id="fnr1"><a href="#fn1">1</a></sup> is the best one I have used so far. As for Python, I would recommend either Visual Studio Code<sup class="footnote" id="fnr2"><a href="#fn2">2</a></sup> or PyCharm<sup class="footnote" id="fnr3"><a href="#fn3">3</a></sup>. But of course, you could use any text editor to write R/Python scripts.</p>
<p>Let’s write a simple script to print <code>Hello World!</code> in R/Python. I have made a directory <code>chapter2</code> on my disk, the R script is saved as <code>hello_world.R</code> and the Python script is saved as <code>hello_world.py</code>, inside the directory.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<p>chapter2/hello_world.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="kp">print</span><span class="p">(</span><span class="s">"Hello World!"</span><span class="p">)</span></code></pre></figure></p>
</div>
<div class="coderight">
<language>Python</language>
<p>chapter2/hello_world.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="nb">print</span><span class="p">(</span><span class="s2">"Hello World!"</span><span class="p">)</span></code></pre></figure></p>
</div>
</div>
<p>There are a few ways to run the R script. For example, we can run the script from the console with the <code>r -f filename</code> command. Also, we can open the R interactive session and use the <code>source()</code> function. I would recommend the second approach with <code>source()</code> function. As for the Python script, we can run it from the console.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span>chapter2 <span class="o">$</span><span class="kp">ls</span>
<span class="lineno">2 </span>hello_world.R hello_world.py
<span class="lineno">3 </span>chapter2 <span class="o">$</span>r <span class="o">-</span>f hello_world.R
<span class="lineno">4 </span><span class="o">></span> <span class="kp">print</span><span class="p">(</span><span class="s">"Hello World!"</span><span class="p">)</span>
<span class="lineno">5 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="s">"Hello World!"</span>
<span class="lineno">6 </span>
<span class="lineno">7 </span>chapter2 <span class="o">$</span>r
<span class="lineno">8 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">'hello_world.R'</span><span class="p">)</span>
<span class="lineno">9 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="s">"Hello World!"</span></code></pre></figure></div>
<div class="coderight">
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="n">chapter2</span> <span class="err">$</span><span class="n">ls</span>
<span class="lineno">2 </span><span class="n">hello_world</span><span class="o">.</span><span class="n">R</span> <span class="n">hello_world</span><span class="o">.</span><span class="n">py</span>
<span class="lineno">3 </span>
<span class="lineno">4 </span><span class="n">chapter2</span> <span class="err">$</span><span class="n">python3</span><span class="o">.</span><span class="mi">7</span> <span class="n">hello_world</span><span class="o">.</span><span class="n">py</span>
<span class="lineno">5 </span><span class="n">Hello</span> <span class="n">World</span><span class="err">!</span></code></pre></figure></div>
</div>
<h2 id="debug">Debugging</h2>
<p>Debugging is one of the most important aspects of programming. What is debugging in programming? The programs we write might include errors/bugs and debugging is a step-by-step process to find and remove the errors/bugs in order to get the desired results.</p>
<p>If you are smart enough or the bugs are evident enough then you can debug the program on your mind without using a computer at all. But in general we need some tools/techniques to help us with debugging.</p>
<h3>Print</h3>
<p>Most of programming languages provide the functionality of printing, which is a natural way of debugging. By trying to place <code>print</code> statements at different positions we may finally catch the bugs. When I use <code>print</code> to debug, it’s feeling like playing the game of minesweeper. In Python, there is a module called <code>logging</code><sup class="footnote" id="fnr4"><a href="#fn4">4</a></sup> which could be used for debugging like the <code>print</code> function, but in a more elegant fashion.</p>
<h4>Browser in R and pdb in Python</h4>
<p>In R, there is a function <code>browser()</code> which interrupts the execution and allows the inspection of the current environment. Similarly, there is a module <code>pdb</code> in Python that provides more debugging features. We would only focus on the basic usages of <code>browser()</code> and the <code>set_trace()</code> function in <code>pdb</code> module.<br />
The essential difference between debugging using <code>print()</code> and <code>browser()</code> and <code>set_trace()</code> is that the latter functions allows us to debug in an interactive mode.</p>
<p>Let’s write a function which takes a sorted vector/list <code>v</code> and a target value <code>x</code> as input and returns the leftmost index <code>pos</code> of the sorted vector/list so that <code>v[pos]>=x</code>. Since <code>v</code> is already sorted, we may simply loop through it from left to right to find <code>pos</code>.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<p>chapter2/find_pos.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span>find_pos<span class="o">=</span><span class="kr">function</span><span class="p">(</span>v<span class="p">,</span>x<span class="p">){</span>
<span class="lineno"> 2 </span> <span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> <span class="m">1</span><span class="o">:</span><span class="kp">length</span><span class="p">(</span>v<span class="p">)){</span>
<span class="lineno"> 3 </span> <span class="kr">if</span> <span class="p">(</span>v<span class="p">[</span>i<span class="p">]</span><span class="o">>=</span>x<span class="p">){</span>
<span class="lineno"> 4 </span> <span class="kr">return</span><span class="p">(</span>i<span class="p">)</span>
<span class="lineno"> 5 </span> <span class="p">}</span>
<span class="lineno"> 6 </span> <span class="p">}</span>
<span class="lineno"> 7 </span><span class="p">}</span>
<span class="lineno"> 8 </span>
<span class="lineno"> 9 </span>v<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="m">1</span><span class="p">,</span><span class="m">2</span><span class="p">,</span><span class="m">5</span><span class="p">,</span><span class="m">10</span><span class="p">)</span>
<span class="lineno">10 </span><span class="kp">print</span><span class="p">(</span>find_pos<span class="p">(</span>v<span class="p">,</span><span class="m">-1</span><span class="p">))</span>
<span class="lineno">11 </span><span class="kp">print</span><span class="p">(</span>find_pos<span class="p">(</span>v<span class="p">,</span><span class="m">4</span><span class="p">))</span>
<span class="lineno">12 </span><span class="kp">print</span><span class="p">(</span>find_pos<span class="p">(</span>v<span class="p">,</span><span class="m">11</span><span class="p">))</span></code></pre></figure></p>
</div>
<div class="coderight">
<language>Python</language>
<p>chapter2/find_pos.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="k">def</span> <span class="nf">find_pos</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
<span class="lineno">2 </span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">v</span><span class="p">)):</span>
<span class="lineno">3 </span> <span class="k">if</span> <span class="n">v</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">>=</span><span class="n">x</span><span class="p">:</span>
<span class="lineno">4 </span> <span class="k">return</span> <span class="n">i</span>
<span class="lineno">5 </span>
<span class="lineno">6 </span><span class="n">v</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">10</span><span class="p">]</span>
<span class="lineno">7 </span><span class="nb">print</span><span class="p">(</span><span class="n">find_pos</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span>
<span class="lineno">8 </span><span class="nb">print</span><span class="p">(</span><span class="n">find_pos</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="mi">4</span><span class="p">))</span>
<span class="lineno">9 </span><span class="nb">print</span><span class="p">(</span><span class="n">find_pos</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="mi">11</span><span class="p">))</span></code></pre></figure></p>
</div>
</div>
<p>Now let’s run these two scripts.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span>chapter2 <span class="o">$</span>r
<span class="lineno">2 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">'find_pos.R'</span><span class="p">)</span>
<span class="lineno">3 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">1</span>
<span class="lineno">4 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">3</span>
<span class="lineno">5 </span><span class="kc">NULL</span></code></pre></figure></div>
<div class="coderight">
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="n">chapter2</span> <span class="err">$</span><span class="n">python3</span><span class="o">.</span><span class="mi">7</span> <span class="n">find_pos</span><span class="o">.</span><span class="n">py</span>
<span class="lineno">2 </span><span class="mi">0</span>
<span class="lineno">3 </span><span class="mi">2</span>
<span class="lineno">4 </span><span class="kc">None</span></code></pre></figure></div>
</div>
<p>When <code>x=11</code>, the function returns <code>NULL</code> in R and <code>None</code> in Python because there is no such element in <code>v</code> larger than <code>x</code>.<br />
The implementation above is trivial, but not efficient. If you have some background in data structures and algorithms, you probably know this question can be solved by binary search. The essential idea of binary search comes from divide-and-conquer<sup class="footnote" id="fnr5"><a href="#fn5">5</a></sup>. Since <code>v</code> is already sorted, we may divide it into two partitions by cutting it from the middle, and then we get the left partition and the right partition. <code>v</code> is sorted implies that both the left partition and the right partition are also sorted. If the target value <code>x</code> is larger than the rightmost element in the left partition, we can just discard the left partition and search <code>x</code> within the right partition. Otherwise, we can discard the right partition and search <code>x</code> within the left partition. Once we have determined which partition to search, we may apply the idea recursively so that in each step we reduce the size of <code>v</code> by half. If the length of <code>v</code> is denoted as <code>n</code>, in terms of big O notation<sup class="footnote" id="fnr6"><a href="#fn6">6</a></sup>, the run time complexity of binary search is $\mathcal{O}(\log{}n)$, compared with $\mathcal{O}(n)$ of the for-loop implementation.</p>
<p>The code below implements the binary search solution to our question (It is more intuitive to do it with recursion but here I write it with iteration since tail recursion optimization<sup class="footnote" id="fnr7"><a href="#fn7">7</a></sup> in R/Python is not supported).</p>
<language>R</language>
<p>chapter2/find_binary_search_buggy.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span>binary_search_buggy<span class="o">=</span><span class="kr">function</span><span class="p">(</span>v<span class="p">,</span>x<span class="p">){</span>
<span class="lineno"> 2 </span> start <span class="o">=</span> <span class="m">1</span>
<span class="lineno"> 3 </span> end <span class="o">=</span> <span class="kp">length</span><span class="p">(</span>v<span class="p">)</span>
<span class="lineno"> 4 </span> <span class="kr">while</span> <span class="p">(</span>start<span class="o"><</span>end<span class="p">){</span>
<span class="lineno"> 5 </span> mid <span class="o">=</span> <span class="p">(</span>start<span class="o">+</span>end<span class="p">)</span> <span class="o">%/%</span> <span class="m">2</span> <span class="c1"># %/% is the floor division operator</span>
<span class="lineno"> 6 </span> <span class="kr">if</span> <span class="p">(</span>v<span class="p">[</span>mid<span class="p">]</span><span class="o">>=</span>x<span class="p">){</span>
<span class="lineno"> 7 </span> end <span class="o">=</span> mid
<span class="lineno"> 8 </span> <span class="p">}</span><span class="kp">else</span><span class="p">{</span>
<span class="lineno"> 9 </span> start <span class="o">=</span> mid<span class="m">+1</span>
<span class="lineno">10 </span> <span class="p">}</span>
<span class="lineno">11 </span> <span class="p">}</span>
<span class="lineno">12 </span> <span class="kr">return</span><span class="p">(</span>start<span class="p">)</span>
<span class="lineno">13 </span><span class="p">}</span>
<span class="lineno">14 </span>v<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="m">1</span><span class="p">,</span><span class="m">2</span><span class="p">,</span><span class="m">5</span><span class="p">,</span><span class="m">10</span><span class="p">)</span>
<span class="lineno">15 </span><span class="kp">print</span><span class="p">(</span>binary_search_buggy<span class="p">(</span>v<span class="p">,</span><span class="m">-1</span><span class="p">))</span>
<span class="lineno">16 </span><span class="kp">print</span><span class="p">(</span>binary_search_buggy<span class="p">(</span>v<span class="p">,</span><span class="m">5</span><span class="p">))</span>
<span class="lineno">17 </span><span class="kp">print</span><span class="p">(</span>binary_search_buggy<span class="p">(</span>v<span class="p">,</span><span class="m">11</span><span class="p">))</span></code></pre></figure></p>
<language>Python</language>
<p>chapter2/find_binary_search_buggy.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="k">def</span> <span class="nf">binary_search_buggy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
<span class="lineno"> 2 </span> <span class="n">start</span><span class="p">,</span><span class="n">end</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="nb">len</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span>
<span class="lineno"> 3 </span> <span class="k">while</span> <span class="n">start</span><span class="o"><</span><span class="n">end</span><span class="p">:</span>
<span class="lineno"> 4 </span> <span class="n">mid</span> <span class="o">=</span> <span class="p">(</span><span class="n">start</span><span class="o">+</span><span class="n">end</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span> <span class="c1"># // is the floor division operator</span>
<span class="lineno"> 5 </span> <span class="k">if</span> <span class="n">v</span><span class="p">[</span><span class="n">mid</span><span class="p">]</span><span class="o">>=</span><span class="n">x</span><span class="p">:</span>
<span class="lineno"> 6 </span> <span class="n">end</span> <span class="o">=</span> <span class="n">mid</span>
<span class="lineno"> 7 </span> <span class="k">else</span><span class="p">:</span>
<span class="lineno"> 8 </span> <span class="n">start</span> <span class="o">=</span> <span class="n">mid</span><span class="o">+</span><span class="mi">1</span>
<span class="lineno"> 9 </span> <span class="k">return</span> <span class="n">start</span>
<span class="lineno">10 </span>
<span class="lineno">11 </span><span class="n">v</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">10</span><span class="p">]</span>
<span class="lineno">12 </span><span class="nb">print</span><span class="p">(</span><span class="n">binary_search_buggy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span>
<span class="lineno">13 </span><span class="nb">print</span><span class="p">(</span><span class="n">binary_search_buggy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="mi">5</span><span class="p">))</span>
<span class="lineno">14 </span><span class="nb">print</span><span class="p">(</span><span class="n">binary_search_buggy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="mi">11</span><span class="p">))</span></code></pre></figure></p>
<p>Now let’s run these two binary search scripts.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span>chapter2 <span class="o">$</span>r
<span class="lineno">2 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span> <span class="s">'binary_search_buggy.R'</span><span class="p">)</span>
<span class="lineno">3 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">1</span>
<span class="lineno">4 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">3</span>
<span class="lineno">5 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">4</span></code></pre></figure></div>
<div class="coderight">
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="n">chapter2</span> <span class="err">$</span><span class="n">python3</span><span class="o">.</span><span class="mi">7</span> <span class="n">binary_search_buggy</span><span class="o">.</span><span class="n">py</span>
<span class="lineno">2 </span><span class="mi">0</span>
<span class="lineno">3 </span><span class="mi">2</span>
<span class="lineno">4 </span><span class="mi">3</span></code></pre></figure></div>
</div>
<p>The binary search solutions don’t work as expected when <code>x=11</code>. We write two new scripts.</p>
<language>R</language>
<p>chapter2/find_binary_search_buggy_debug.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span>binary_search_buggy<span class="o">=</span><span class="kr">function</span><span class="p">(</span>v<span class="p">,</span>x<span class="p">){</span>
<span class="lineno"> 2 </span> <span class="kp">browser</span><span class="p">()</span>
<span class="lineno"> 3 </span> start <span class="o">=</span> <span class="m">1</span>
<span class="lineno"> 4 </span> end <span class="o">=</span> <span class="kp">length</span><span class="p">(</span>v<span class="p">)</span>
<span class="lineno"> 5 </span> <span class="kr">while</span> <span class="p">(</span>start<span class="o"><</span>end<span class="p">){</span>
<span class="lineno"> 6 </span> mid <span class="o">=</span> <span class="p">(</span>start<span class="o">+</span>end<span class="p">)</span>
<span class="lineno"> 7 </span> <span class="kr">if</span> <span class="p">(</span>v<span class="p">[</span>mid<span class="p">]</span><span class="o">>=</span>x<span class="p">){</span>
<span class="lineno"> 8 </span> end <span class="o">=</span> mid
<span class="lineno"> 9 </span> <span class="p">}</span><span class="kp">else</span><span class="p">{</span>
<span class="lineno">10 </span> start <span class="o">=</span> mid<span class="m">+1</span>
<span class="lineno">11 </span> <span class="p">}</span>
<span class="lineno">12 </span> <span class="p">}</span>
<span class="lineno">13 </span> <span class="kr">return</span><span class="p">(</span>start<span class="p">)</span>
<span class="lineno">14 </span><span class="p">}</span>
<span class="lineno">15 </span>v<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="m">1</span><span class="p">,</span><span class="m">2</span><span class="p">,</span><span class="m">5</span><span class="p">,</span><span class="m">10</span><span class="p">)</span>
<span class="lineno">16 </span><span class="kp">print</span><span class="p">(</span>binary_search_buggy<span class="p">(</span>v<span class="p">,</span><span class="m">11</span><span class="p">))</span></code></pre></figure></p>
<language>Python</language>
<p>chapter2/find_binary_search_buggy_debug.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="kn">from</span> <span class="nn">pdb</span> <span class="k">import</span> <span class="n">set_trace</span>
<span class="lineno"> 2 </span><span class="k">def</span> <span class="nf">binary_search_buggy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
<span class="lineno"> 3 </span> <span class="n">set_trace</span><span class="p">()</span>
<span class="lineno"> 4 </span> <span class="n">start</span><span class="p">,</span><span class="n">end</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="nb">len</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span>
<span class="lineno"> 5 </span> <span class="k">while</span> <span class="n">start</span><span class="o"><</span><span class="n">end</span><span class="p">:</span>
<span class="lineno"> 6 </span> <span class="n">mid</span> <span class="o">=</span> <span class="p">(</span><span class="n">start</span><span class="o">+</span><span class="n">end</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span>
<span class="lineno"> 7 </span> <span class="k">if</span> <span class="n">v</span><span class="p">[</span><span class="n">mid</span><span class="p">]</span><span class="o">>=</span><span class="n">x</span><span class="p">:</span>
<span class="lineno"> 8 </span> <span class="n">end</span> <span class="o">=</span> <span class="n">mid</span>
<span class="lineno"> 9 </span> <span class="k">else</span><span class="p">:</span>
<span class="lineno">10 </span> <span class="n">start</span> <span class="o">=</span> <span class="n">mid</span><span class="o">+</span><span class="mi">1</span>
<span class="lineno">11 </span> <span class="k">return</span> <span class="n">start</span>
<span class="lineno">12 </span>
<span class="lineno">13 </span><span class="n">v</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">10</span><span class="p">]</span>
<span class="lineno">14 </span><span class="nb">print</span><span class="p">(</span><span class="n">binary_search_buggy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="mi">11</span><span class="p">))</span></code></pre></figure></p>
<p>Let’s try to debug the programs with the help of <code>browser()</code> and <code>set_trace()</code>.</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">'binary_search_buggy_debug.R'</span><span class="p">)</span>
<span class="lineno"> 2 </span>Called from<span class="o">:</span> binary_search_buggy<span class="p">(</span>v<span class="p">,</span> <span class="m">11</span><span class="p">)</span>
<span class="lineno"> 3 </span>Browse<span class="p">[</span><span class="m">1</span><span class="p">]</span><span class="o">></span> <span class="kp">ls</span><span class="p">()</span>
<span class="lineno"> 4 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="s">"v"</span> <span class="s">"x"</span>
<span class="lineno"> 5 </span>Browse<span class="p">[</span><span class="m">1</span><span class="p">]</span><span class="o">></span> n
<span class="lineno"> 6 </span>debug at binary_search_buggy_debug.R<span class="c1">#3: start = 1</span>
<span class="lineno"> 7 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno"> 8 </span>debug at binary_search_buggy_debug.R<span class="c1">#4: end = length(v)</span>
<span class="lineno"> 9 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">10 </span>debug at binary_search_buggy_debug.R<span class="c1">#5: while (start < end) {</span>
<span class="lineno">11 </span> mid <span class="o">=</span> <span class="p">(</span>start <span class="o">+</span> end<span class="p">)</span><span class="o">%/%</span><span class="m">2</span>
<span class="lineno">12 </span> <span class="kr">if</span> <span class="p">(</span>v<span class="p">[</span>mid<span class="p">]</span> <span class="o">>=</span> x<span class="p">)</span> <span class="p">{</span>
<span class="lineno">13 </span> end <span class="o">=</span> mid
<span class="lineno">14 </span> <span class="p">}</span>
<span class="lineno">15 </span> <span class="kr">else</span> <span class="p">{</span>
<span class="lineno">16 </span> start <span class="o">=</span> mid <span class="o">+</span> <span class="m">1</span>
<span class="lineno">17 </span> <span class="p">}</span>
<span class="lineno">18 </span><span class="p">}</span>
<span class="lineno">19 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">20 </span>debug at binary_search_buggy_debug.R<span class="c1">#6: mid = (start + end)%/%2</span>
<span class="lineno">21 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">22 </span>debug at binary_search_buggy_debug.R<span class="c1">#7: if (v[mid] >= x) {</span>
<span class="lineno">23 </span> end <span class="o">=</span> mid
<span class="lineno">24 </span><span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
<span class="lineno">25 </span> start <span class="o">=</span> mid <span class="o">+</span> <span class="m">1</span>
<span class="lineno">26 </span><span class="p">}</span>
<span class="lineno">27 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">28 </span>debug at binary_search_buggy_debug.R<span class="c1">#10: start = mid + 1</span>
<span class="lineno">29 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">30 </span>debug at binary_search_buggy_debug.R<span class="c1">#5: (while) start < end</span>
<span class="lineno">31 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">32 </span>debug at binary_search_buggy_debug.R<span class="c1">#6: mid = (start + end)%/%2</span>
<span class="lineno">33 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">34 </span>debug at binary_search_buggy_debug.R<span class="c1">#7: if (v[mid] >= x) {</span>
<span class="lineno">35 </span> end <span class="o">=</span> mid
<span class="lineno">36 </span><span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
<span class="lineno">37 </span> start <span class="o">=</span> mid <span class="o">+</span> <span class="m">1</span>
<span class="lineno">38 </span><span class="p">}</span>
<span class="lineno">39 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">40 </span>debug at binary_search_buggy_debug.R<span class="c1">#10: start = mid + 1</span>
<span class="lineno">41 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">42 </span>debug at binary_search_buggy_debug.R<span class="c1">#5: (while) start < end</span>
<span class="lineno">43 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> start
<span class="lineno">44 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">4</span>
<span class="lineno">45 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">46 </span>debug at binary_search_buggy_debug.R<span class="c1">#13: return(start)</span>
<span class="lineno">47 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> n
<span class="lineno">48 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">4</span></code></pre></figure><p>In the R code snippet above, we placed the <code>browser()</code> function on the top of the function <code>binary_search_buggy</code>. Then when we call the function we enter into the debugging environment. By calling <code>ls()</code> we see all variables in the current debugging scope, i.e., <code>v,x</code>. Typing <code>n</code> will evaluate the next statement. After typing <code>n</code> a few times, we finally exit from the while loop because <code>start = 4</code> such that <code>start < end</code> is <span class="caps">FALSE</span>. As a result, the function just returns the value of start, i.e., 4. To exit from the debugging environment, we can type <code>Q</code>; to continue the execution we can type <code>c</code>.</p>
<p>The root cause is that we didn’t deal with the corner case when the target value <code>x</code> is larger than the last/largest element in <code>v</code> correctly.</p>
<p>Let’s debug the Python function using <code>pdb</code> module.</p>
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="n">chapter2</span> <span class="err">$</span><span class="n">python3</span><span class="o">.</span><span class="mi">7</span> <span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span>
<span class="lineno"> 2 </span><span class="o">></span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span> <span class="n">binary_search_buggy</span><span class="p">()</span>
<span class="lineno"> 3 </span><span class="o">-></span> <span class="n">start</span><span class="p">,</span><span class="n">end</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="nb">len</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span>
<span class="lineno"> 4 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">n</span>
<span class="lineno"> 5 </span><span class="o">></span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="n">binary_search_buggy</span><span class="p">()</span>
<span class="lineno"> 6 </span><span class="o">-></span> <span class="k">while</span> <span class="n">start</span><span class="o"><</span><span class="n">end</span><span class="p">:</span>
<span class="lineno"> 7 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">l</span>
<span class="lineno"> 8 </span> <span class="mi">1</span> <span class="kn">from</span> <span class="nn">pdb</span> <span class="k">import</span> <span class="n">set_trace</span>
<span class="lineno"> 9 </span> <span class="mi">2</span> <span class="k">def</span> <span class="nf">binary_search_buggy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
<span class="lineno">10 </span> <span class="mi">3</span> <span class="n">set_trace</span><span class="p">()</span>
<span class="lineno">11 </span> <span class="mi">4</span> <span class="n">start</span><span class="p">,</span><span class="n">end</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="nb">len</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span>
<span class="lineno">12 </span> <span class="mi">5</span> <span class="o">-></span> <span class="k">while</span> <span class="n">start</span><span class="o"><</span><span class="n">end</span><span class="p">:</span>
<span class="lineno">13 </span> <span class="mi">6</span> <span class="n">mid</span> <span class="o">=</span> <span class="p">(</span><span class="n">start</span><span class="o">+</span><span class="n">end</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span>
<span class="lineno">14 </span> <span class="mi">7</span> <span class="k">if</span> <span class="n">v</span><span class="p">[</span><span class="n">mid</span><span class="p">]</span><span class="o">>=</span><span class="n">x</span><span class="p">:</span>
<span class="lineno">15 </span> <span class="mi">8</span> <span class="n">end</span> <span class="o">=</span> <span class="n">mid</span>
<span class="lineno">16 </span> <span class="mi">9</span> <span class="k">else</span><span class="p">:</span>
<span class="lineno">17 </span> <span class="mi">10</span> <span class="n">start</span> <span class="o">=</span> <span class="n">mid</span><span class="o">+</span><span class="mi">1</span>
<span class="lineno">18 </span> <span class="mi">11</span> <span class="k">return</span> <span class="n">start</span>
<span class="lineno">19 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">b</span> <span class="mi">7</span>
<span class="lineno">20 </span><span class="n">Breakpoint</span> <span class="mi">1</span> <span class="n">at</span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">7</span>
<span class="lineno">21 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">c</span>
<span class="lineno">22 </span><span class="o">></span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">(</span><span class="mi">7</span><span class="p">)</span> <span class="n">binary_search_buggy</span><span class="p">()</span>
<span class="lineno">23 </span><span class="o">-></span> <span class="k">if</span> <span class="n">v</span><span class="p">[</span><span class="n">mid</span><span class="p">]</span><span class="o">>=</span><span class="n">x</span><span class="p">:</span>
<span class="lineno">24 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">c</span>
<span class="lineno">25 </span><span class="o">></span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">(</span><span class="mi">7</span><span class="p">)</span> <span class="n">binary_search_buggy</span><span class="p">()</span>
<span class="lineno">26 </span><span class="o">-></span> <span class="k">if</span> <span class="n">v</span><span class="p">[</span><span class="n">mid</span><span class="p">]</span><span class="o">>=</span><span class="n">x</span><span class="p">:</span>
<span class="lineno">27 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">mid</span>
<span class="lineno">28 </span><span class="mi">2</span>
<span class="lineno">29 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">n</span>
<span class="lineno">30 </span><span class="o">></span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span> <span class="n">binary_search_buggy</span><span class="p">()</span>
<span class="lineno">31 </span><span class="o">-></span> <span class="n">start</span> <span class="o">=</span> <span class="n">mid</span><span class="o">+</span><span class="mi">1</span>
<span class="lineno">32 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">n</span>
<span class="lineno">33 </span><span class="o">></span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="n">binary_search_buggy</span><span class="p">()</span>
<span class="lineno">34 </span><span class="o">-></span> <span class="k">while</span> <span class="n">start</span><span class="o"><</span><span class="n">end</span><span class="p">:</span>
<span class="lineno">35 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">start</span>
<span class="lineno">36 </span><span class="mi">3</span>
<span class="lineno">37 </span><span class="p">(</span><span class="n">Pdb</span><span class="p">)</span> <span class="n">n</span>
<span class="lineno">38 </span><span class="o">></span> <span class="n">chapter2</span><span class="o">/</span><span class="n">binary_search_buggy_debug</span><span class="o">.</span><span class="n">py</span><span class="p">(</span><span class="mi">11</span><span class="p">)</span> <span class="n">binary_search_buggy</span><span class="p">()</span>
<span class="lineno">39 </span><span class="o">-></span> <span class="k">return</span> <span class="n">start</span></code></pre></figure><p>Similar to R, command <code>n</code> would evaluate the next statement in <code>pdb</code>. Typing command <code>l</code> would show the current line of current execution. Command <code>b line_number</code> would set the corresponding line as a break point; and <code>c</code> would continue the execution until the next breakpoint (if exists).</p>
<p>In R, besides the <code>browser()</code> function there are a pair of functions <code>debug() and undebug()</code> which are also very handy when we try to debug a function; especially when the function is wrapped in a package. More specifically, the <code>debug</code> function would invoke the debugging environment whenever we call the function to debug. See the example below how we invoke the debugging environment for the <code>sd</code> function (standard deviation calculation).</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="o">></span> x<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="m">1</span><span class="p">,</span><span class="m">1</span><span class="p">,</span><span class="m">2</span><span class="p">)</span>
<span class="lineno"> 2 </span><span class="o">></span> <span class="kp">debug</span><span class="p">(</span>sd<span class="p">)</span>
<span class="lineno"> 3 </span><span class="o">></span> sd<span class="p">(</span>x<span class="p">)</span>
<span class="lineno"> 4 </span>debugging <span class="kp">in</span><span class="o">:</span> sd<span class="p">(</span>x<span class="p">)</span>
<span class="lineno"> 5 </span><span class="kp">debug</span><span class="o">:</span> <span class="kp">sqrt</span><span class="p">(</span>var<span class="p">(</span><span class="kr">if</span> <span class="p">(</span><span class="kp">is.vector</span><span class="p">(</span>x<span class="p">)</span> <span class="o">||</span> <span class="kp">is.factor</span><span class="p">(</span>x<span class="p">))</span> x <span class="kr">else</span> <span class="kp">as.double</span><span class="p">(</span>x<span class="p">),</span>
<span class="lineno"> 6 </span> na.rm <span class="o">=</span> na.rm<span class="p">))</span>
<span class="lineno"> 7 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> <span class="kp">ls</span><span class="p">()</span>
<span class="lineno"> 8 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="s">"na.rm"</span> <span class="s">"x"</span>
<span class="lineno"> 9 </span>Browse<span class="p">[</span><span class="m">2</span><span class="p">]</span><span class="o">></span> Q
<span class="lineno">10 </span><span class="o">></span> <span class="kp">undebug</span><span class="p">(</span>sd<span class="p">)</span>
<span class="lineno">11 </span><span class="o">></span> sd<span class="p">(</span>x<span class="p">)</span>
<span class="lineno">12 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">0.5773503</span></code></pre></figure><p>The binary_search solutions are fixed below.</p>
<language>R</language>
<p>chapter2/find_binary_search.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span>binary_search<span class="o">=</span><span class="kr">function</span><span class="p">(</span>v<span class="p">,</span>x<span class="p">){</span>
<span class="lineno"> 2 </span> <span class="kr">if</span> <span class="p">(</span>x<span class="o">></span>v<span class="p">[</span><span class="kp">length</span><span class="p">(</span>v<span class="p">)]){</span><span class="kr">return</span><span class="p">(</span><span class="kc">NULL</span><span class="p">)}</span>
<span class="lineno"> 3 </span> start <span class="o">=</span> <span class="m">1</span>
<span class="lineno"> 4 </span> end <span class="o">=</span> <span class="kp">length</span><span class="p">(</span>v<span class="p">)</span>
<span class="lineno"> 5 </span> <span class="kr">while</span> <span class="p">(</span>start<span class="o"><</span>end<span class="p">){</span>
<span class="lineno"> 6 </span> mid <span class="o">=</span> <span class="p">(</span>start<span class="o">+</span>end<span class="p">)</span>
<span class="lineno"> 7 </span> <span class="kr">if</span> <span class="p">(</span>v<span class="p">[</span>mid<span class="p">]</span><span class="o">>=</span>x<span class="p">){</span>
<span class="lineno"> 8 </span> end <span class="o">=</span> mid
<span class="lineno"> 9 </span> <span class="p">}</span><span class="kp">else</span><span class="p">{</span>
<span class="lineno">10 </span> start <span class="o">=</span> mid<span class="m">+1</span>
<span class="lineno">11 </span> <span class="p">}</span>
<span class="lineno">12 </span> <span class="p">}</span>
<span class="lineno">13 </span> <span class="kr">return</span><span class="p">(</span>start<span class="p">)</span>
<span class="lineno">14 </span><span class="p">}</span></code></pre></figure></p>
<language>Python</language>
<p>chapter2/find_binary_search.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="k">def</span> <span class="nf">binary_search</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
<span class="lineno"> 2 </span> <span class="k">if</span> <span class="n">x</span><span class="o">></span><span class="n">v</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span> <span class="k">return</span>
<span class="lineno"> 3 </span> <span class="n">start</span><span class="p">,</span><span class="n">end</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="nb">len</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span>
<span class="lineno"> 4 </span> <span class="k">while</span> <span class="n">start</span><span class="o"><</span><span class="n">end</span><span class="p">:</span>
<span class="lineno"> 5 </span> <span class="n">mid</span> <span class="o">=</span> <span class="p">(</span><span class="n">start</span><span class="o">+</span><span class="n">end</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span>
<span class="lineno"> 6 </span> <span class="k">if</span> <span class="n">v</span><span class="p">[</span><span class="n">mid</span><span class="p">]</span><span class="o">>=</span><span class="n">x</span><span class="p">:</span>
<span class="lineno"> 7 </span> <span class="n">end</span> <span class="o">=</span> <span class="n">mid</span>
<span class="lineno"> 8 </span> <span class="k">else</span><span class="p">:</span>
<span class="lineno"> 9 </span> <span class="n">start</span> <span class="o">=</span> <span class="n">mid</span><span class="o">+</span><span class="mi">1</span>
<span class="lineno">10 </span> <span class="k">return</span> <span class="n">start</span></code></pre></figure></p>
<h2 id="benchmark">Benchmarking</h2>
<p>By benchmarking, I mean measuring the entire operation time of a piece of program. There is another term called profiling which is related to benchmarking. But profiling is more complex since it commonly aims at understanding the behavior of the program and optimizing the program in terms of time elapsed during the operation.</p>
<p>In R, I like using the <code>microbenchmark</code> package. And in Python, <code>timeit</code> module is a good tool to use when we want to benchmark a small bits of Python code.</p>
<p>As mentioned before, the run time complexity of binary search is better than that of a for-loop search. We can do benchmarking to compare the two algorithms.</p>
<language>R</language>
<p>chapter2/benchmark.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="kn">library</span><span class="p">(</span>microbenchmark<span class="p">)</span>
<span class="lineno"> 2 </span><span class="kn">source</span><span class="p">(</span><span class="s">'binary_search.R'</span><span class="p">)</span>
<span class="lineno"> 3 </span><span class="kn">source</span><span class="p">(</span><span class="s">'find_pos.R'</span><span class="p">)</span>
<span class="lineno"> 4 </span>
<span class="lineno"> 5 </span>v<span class="o">=</span><span class="m">1</span><span class="o">:</span><span class="m">10000</span>
<span class="lineno"> 6 </span>
<span class="lineno"> 7 </span><span class="c1"># call each function 1000 times;</span>
<span class="lineno"> 8 </span><span class="c1"># each time we randomly select an integer as the target value</span>
<span class="lineno"> 9 </span>
<span class="lineno">10 </span><span class="c1"># for-loop solution</span>
<span class="lineno">11 </span><span class="kp">set.seed</span><span class="p">(</span><span class="m">2019</span><span class="p">)</span>
<span class="lineno">12 </span><span class="kp">print</span><span class="p">(</span>microbenchmark<span class="p">(</span>find_pos<span class="p">(</span>v<span class="p">,</span><span class="kp">sample</span><span class="p">(</span><span class="m">10000</span><span class="p">,</span><span class="m">1</span><span class="p">)),</span> times<span class="o">=</span><span class="m">1000</span><span class="p">))</span>
<span class="lineno">13 </span><span class="c1"># binary-search solution</span>
<span class="lineno">14 </span><span class="kp">set.seed</span><span class="p">(</span><span class="m">2019</span><span class="p">)</span>
<span class="lineno">15 </span><span class="kp">print</span><span class="p">(</span>microbenchmark<span class="p">(</span>binary_search<span class="p">(</span>v<span class="p">,</span> <span class="kp">sample</span><span class="p">(</span><span class="m">10000</span><span class="p">,</span><span class="m">1</span><span class="p">)),</span> times<span class="o">=</span><span class="m">1000</span><span class="p">))</span></code></pre></figure></p>
<p>In the R code above, <code>times=1000</code> means we want to call the function <code>1000</code> times in the benchmarking process. The <code>sample()</code> function is used to draw samples from a set of elements. Specifically, we pass the argument <code>1</code> to <code>sample()</code> to draw a single element. It’s the first time we use <code>set.seed()</code> function in this book. In R/Python, we draw random numbers based on the pseudorandom number generator (<span class="caps">PRNG</span>) algorithm<sup class="footnote" id="fnr8"><a href="#fn8">8</a></sup>. The sequence of numbers generated by <span class="caps">PRNG</span> is completed determined by an initial value, i.e., the seed. Whenever a program involves the usage of <span class="caps">PRNG</span>, it is better to set the seed in order to get replicable results (see the example below).</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="o">></span> <span class="kp">set.seed</span><span class="p">(</span><span class="m">2019</span><span class="p">)</span>
<span class="lineno">2 </span><span class="o">></span> rnorm<span class="p">(</span><span class="m">1</span><span class="p">)</span>
<span class="lineno">3 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">0.7385227</span>
<span class="lineno">4 </span><span class="o">></span> rnorm<span class="p">(</span><span class="m">1</span><span class="p">)</span>
<span class="lineno">5 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">-0.5147605</span></code></pre></figure></div>
<div class="coderight">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="o">></span> <span class="kp">set.seed</span><span class="p">(</span><span class="m">2019</span><span class="p">)</span>
<span class="lineno">2 </span><span class="o">></span> rnorm<span class="p">(</span><span class="m">1</span><span class="p">)</span>
<span class="lineno">3 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">0.7385227</span>
<span class="lineno">4 </span><span class="o">></span> <span class="kp">set.seed</span><span class="p">(</span><span class="m">2019</span><span class="p">)</span>
<span class="lineno">5 </span><span class="o">></span> rnorm<span class="p">(</span><span class="m">1</span><span class="p">)</span>
<span class="lineno">6 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">0.7385227</span></code></pre></figure></div>
</div>
<p>Now let’s run the R script to see the benchmarking result.</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">'benchmark.R'</span><span class="p">)</span>
<span class="lineno">2 </span>Unit<span class="o">:</span> microseconds
<span class="lineno">3 </span> expr min lq mean median uq max neval
<span class="lineno">4 </span> find_pos<span class="p">(</span>v<span class="p">,</span> <span class="kp">sample</span><span class="p">(</span><span class="m">10000</span><span class="p">,</span> <span class="m">1</span><span class="p">))</span> <span class="m">3.96</span> <span class="m">109.5385</span> <span class="m">207.6627</span> <span class="m">207.5565</span> <span class="m">307.8875</span> <span class="m">536.171</span> <span class="m">1000</span>
<span class="lineno">5 </span>
<span class="lineno">6 </span>Unit<span class="o">:</span> microseconds
<span class="lineno">7 </span> expr min lq mean median uq max neval
<span class="lineno">8 </span> binary_search<span class="p">(</span>v<span class="p">,</span> <span class="kp">sample</span><span class="p">(</span><span class="m">10000</span><span class="p">,</span> <span class="m">1</span><span class="p">))</span> <span class="m">5.898</span> <span class="m">6.3325</span> <span class="m">14.2159</span> <span class="m">6.6115</span> <span class="m">7.3635</span> <span class="m">6435.57</span> <span class="m">1000</span></code></pre></figure><p>The binary_search solution is much more efficient based on the benchmarking result.<br />
Doing the same benchmarking in Python is a bit of complicated.</p>
<language>Python</language>
<p>chapter2/benchmark.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="kn">from</span> <span class="nn">binary_search</span> <span class="k">import</span> <span class="n">binary_search</span>
<span class="lineno"> 2 </span><span class="kn">from</span> <span class="nn">find_pos</span> <span class="k">import</span> <span class="n">find_pos</span>
<span class="lineno"> 3 </span><span class="kn">import</span> <span class="nn">timeit</span>
<span class="lineno"> 4 </span><span class="kn">import</span> <span class="nn">random</span>
<span class="lineno"> 5 </span>
<span class="lineno"> 6 </span><span class="n">v</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">10001</span><span class="p">))</span>
<span class="lineno"> 7 </span>
<span class="lineno"> 8 </span><span class="k">def</span> <span class="nf">test_for_loop</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno"> 9 </span> <span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">2019</span><span class="p">)</span>
<span class="lineno">10 </span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">11 </span> <span class="n">find_pos</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">10000</span><span class="p">))</span>
<span class="lineno">12 </span>
<span class="lineno">13 </span><span class="k">def</span> <span class="nf">test_bs</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">14 </span> <span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">2019</span><span class="p">)</span>
<span class="lineno">15 </span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">16 </span> <span class="n">binary_search</span><span class="p">(</span><span class="n">v</span><span class="p">,</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">10000</span><span class="p">))</span>
<span class="lineno">17 </span>
<span class="lineno">18 </span><span class="c1"># for-loop solution</span>
<span class="lineno">19 </span><span class="nb">print</span><span class="p">(</span><span class="n">timeit</span><span class="o">.</span><span class="n">timeit</span><span class="p">(</span><span class="s1">'test_for_loop(1000)'</span><span class="p">,</span> <span class="n">setup</span><span class="o">=</span><span class="s1">'from __main__ import test_for_loop'</span><span class="p">,</span><span class="n">number</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span>
<span class="lineno">20 </span><span class="c1"># binary_search solution</span>
<span class="lineno">21 </span><span class="nb">print</span><span class="p">(</span><span class="n">timeit</span><span class="o">.</span><span class="n">timeit</span><span class="p">(</span><span class="s1">'test_bs(1000)'</span><span class="p">,</span> <span class="n">setup</span><span class="o">=</span><span class="s1">'from __main__ import test_bs'</span><span class="p">,</span><span class="n">number</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span></code></pre></figure></p>
<p>The most interesting part of the Python code above is <code>from __main__ import</code>. Let’s ignore it for now, and we would revisit it later.</p>
<p>Below is the benchmarking result in Python (the unit is second).</p>
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="n">chapter2</span> <span class="err">$</span><span class="n">python3</span> <span class="n">benchmark</span><span class="o">.</span><span class="n">py</span>
<span class="lineno">2 </span><span class="mf">0.284618441</span>
<span class="lineno">3 </span><span class="mf">0.00396658900000002</span></code></pre></figure><h2 id="vectorization">Vectorization</h2>
<p>In parallel computing, automatic vectorization<sup class="footnote" id="fnr9"><a href="#fn9">9</a></sup> means a program in a scalar implementation is converted to a vector implementation which process multiple pairs of operands simultaneously by compilers that feature auto-vectorization. For example, let’s calculate the element-wise sum of two arrays <code>x</code> and <code>y</code> of the same length in C programming language.</p>
<language>C</language>
<figure class="highlight"><pre><code class="language-c" data-lang="c"><span></span><span class="lineno">1 </span><span class="kt">int</span> <span class="n">x</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">};</span>
<span class="lineno">2 </span><span class="kt">int</span> <span class="n">y</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">};</span>
<span class="lineno">3 </span><span class="kt">int</span> <span class="n">z</span><span class="p">[</span><span class="mi">4</span><span class="p">];</span>
<span class="lineno">4 </span><span class="k">for</span> <span class="p">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="p">;</span><span class="n">i</span><span class="o"><</span><span class="mi">4</span><span class="p">;</span><span class="n">i</span><span class="o">++</span><span class="p">){</span>
<span class="lineno">5 </span> <span class="n">z</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">=</span><span class="n">x</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">+</span><span class="n">y</span><span class="p">[</span><span class="n">i</span><span class="p">];</span>
<span class="lineno">6 </span><span class="p">}</span></code></pre></figure><p>The C code above might be vectorized by the compiler so that the actual number of iterations performed could be less than 4. If 4 pairs of operands are processed at once, there would be only 1 iteration. Automatic vectorization may make the program runs much faster in some languages like C. However, when we talk about vectorization in R/Python, it is different from automatic vectorization. Vectorization in R/Python usually refers to the human effort paid to avoid for-loops. First, let’s see some examples of how for-loops may slow your programs in R/Python.</p>
<language>R</language>
<p>chapter2/vectorization_1.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="kn">library</span><span class="p">(</span>microbenchmark<span class="p">)</span>
<span class="lineno"> 2 </span>
<span class="lineno"> 3 </span><span class="c1"># generate n standard normal r.v</span>
<span class="lineno"> 4 </span>rnorm_loop <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">){</span>
<span class="lineno"> 5 </span>x<span class="o">=</span><span class="kp">rep</span><span class="p">(</span><span class="m">0</span><span class="p">,</span>n<span class="p">)</span>
<span class="lineno"> 6 </span><span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> <span class="m">1</span><span class="o">:</span>n<span class="p">)</span> <span class="p">{</span>x<span class="p">[</span>i<span class="p">]</span><span class="o">=</span>rnorm<span class="p">(</span><span class="m">1</span><span class="p">)}</span>
<span class="lineno"> 7 </span><span class="p">}</span>
<span class="lineno"> 8 </span>
<span class="lineno"> 9 </span>rnorm_vec <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">){</span>
<span class="lineno">10 </span>x<span class="o">=</span>rnorm<span class="p">(</span>n<span class="p">)</span>
<span class="lineno">11 </span><span class="p">}</span>
<span class="lineno">12 </span>
<span class="lineno">13 </span>n<span class="o">=</span><span class="m">100</span>
<span class="lineno">14 </span><span class="c1"># for loop</span>
<span class="lineno">15 </span><span class="kp">print</span><span class="p">(</span>microbenchmark<span class="p">(</span>rnorm_loop<span class="p">(</span>n<span class="p">),</span>times<span class="o">=</span><span class="m">1000</span><span class="p">))</span>
<span class="lineno">16 </span><span class="c1"># vectorize</span>
<span class="lineno">17 </span><span class="kp">print</span><span class="p">(</span>microbenchmark<span class="p">(</span>rnorm_vec<span class="p">(</span>n<span class="p">),</span>times<span class="o">=</span><span class="m">1000</span><span class="p">))</span></code></pre></figure></p>
<p>Running the R code results in the following result on my local machine.</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">'vectorization_1.R'</span><span class="p">)</span>
<span class="lineno">2 </span>Unit<span class="o">:</span> microseconds
<span class="lineno">3 </span> expr min lq mean median uq max neval
<span class="lineno">4 </span> rnorm_loop<span class="p">(</span>n<span class="p">)</span> <span class="m">131.622</span> <span class="m">142.699</span> <span class="m">248.7603</span> <span class="m">145.3995</span> <span class="m">270.212</span> <span class="m">16355.6</span> <span class="m">1000</span>
<span class="lineno">5 </span>Unit<span class="o">:</span> microseconds
<span class="lineno">6 </span> expr min lq mean median uq max neval
<span class="lineno">7 </span> rnorm_vec<span class="p">(</span>n<span class="p">)</span> <span class="m">6.696</span> <span class="m">7.128</span> <span class="m">10.87463</span> <span class="m">7.515</span> <span class="m">8.291</span> <span class="m">2422.338</span> <span class="m">1000</span></code></pre></figure><language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="kn">import</span> <span class="nn">timeit</span>
<span class="lineno"> 2 </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="lineno"> 3 </span>
<span class="lineno"> 4 </span><span class="k">def</span> <span class="nf">rnorm_for_loop</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno"> 5 </span> <span class="n">x</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">*</span><span class="n">n</span> <span class="c1"># create a list with n 0s</span>
<span class="lineno"> 6 </span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">2019</span><span class="p">)</span>
<span class="lineno"> 7 </span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno"> 8 </span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span>
<span class="lineno"> 9 </span>
<span class="lineno">10 </span><span class="k">def</span> <span class="nf">rnorm_vec</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">11 </span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">2019</span><span class="p">)</span>
<span class="lineno">12 </span> <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="n">n</span><span class="p">)</span>
<span class="lineno">13 </span>
<span class="lineno">14 </span><span class="nb">print</span><span class="p">(</span><span class="s2">"for loop"</span><span class="p">)</span>
<span class="lineno">15 </span><span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'{timeit.timeit("rnorm_for_loop(100)", setup="from __main__ import rnorm_for_loop",number=1000):.6f} seconds'</span><span class="p">)</span>
<span class="lineno">16 </span><span class="nb">print</span><span class="p">(</span><span class="s2">"vectorized"</span><span class="p">)</span>
<span class="lineno">17 </span><span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'{timeit.timeit("rnorm_vec(100)", setup="from __main__ import rnorm_vec",number=1000):.6f} seconds'</span><span class="p">)</span></code></pre></figure><p>Please note that in this Python example we are using the <code>random</code> submodule of <code>numpy</code> module instead of the built-in <code>random</code> module since <code>random</code> module doesn’t provide the vectorized version of random number generation function. Running the Python code results in the following result on my local machine.</p>
<p>Also, the <code>timeit.timeit</code> measures the total time to run the main statements <code>number</code> times, but not the average time per run.</p>
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="n">chapter2</span> <span class="err">$</span><span class="n">python3</span><span class="o">.</span><span class="mi">7</span> <span class="n">vectorization_1</span><span class="o">.</span><span class="n">py</span>
<span class="lineno">2 </span><span class="k">for</span> <span class="n">loop</span>
<span class="lineno">3 </span><span class="mf">0.258466</span> <span class="n">seconds</span>
<span class="lineno">4 </span><span class="n">vectorized</span>
<span class="lineno">5 </span><span class="mf">0.008213</span> <span class="n">seconds</span></code></pre></figure><p>In either R or Python, the vectorized version of random normal random variable (r.v.) is significantly faster than the scalar version. It is worth noting the usage of the <code>print(f'')</code> statement in the Python code, which is different from the way how we print the object of <code>Complex</code> class in \autoref{ch:chp1</code>. In the code above, we use the <code>f-string</code><sup class="footnote" id="fnr10"><a href="#fn10">10</a></sup> which is a literal string prefixed with <code>'f'</code> containing expressions inside <code>\{\</code></code> which would be replaced with their values. <code>f-string</code> was a feature introduced since Python 3.6. If you are familiar with Scala, you may find that this feature is quite similar with the string interpolation mechanism introduced since Scala 2.10.</p>
<p>It’s also worth noting that lots of built-in functions in R are already vectorized, such as the basic arithmetic operators, comparison operators, <code>ifelse()</code>, element-wise logical operators <code>&,|</code>. But the logical operators <code>&&, ||</code> are not vectorized.</p>
<p>In addition to vectorization, there are also some built-in functions which may help to avoid the usages of for-loops. For example, in R we might be able use the <code>apply</code> family of functions to replace for-loops; and in Python the <code>map()</code> function can also be useful. In the Python <code>pandas</code> module, there are also many usages of <code>map/apply</code> methods. But in general the usage of <code>apply/map</code> functions has little or nothing to do with performance improvement. However, appropriate usages of such functions may help with the readability of the program. Compared with the <code>apply</code> family of functions in R, I think the <code>do.call()</code> function is more useful in practice. We would spend some time in <code>do.call()</code> later.</p>
<appname>Application – Biham–Middleton–Levine (<span class="caps">BML</span>) traffic model</appname>
<blockquote class="appquote">
<p>
Considering the importance of vectorization in scientific programming, let’s try to get more familiar with vectorization thorough the Biham–Middleton–Levine (<span class="caps">BML</span>) traffic model<sup class="footnote" id="fnr11"><a href="#fn11">11</a></sup>. The <span class="caps">BML</span> model is very important in modern studies of traffic flow since it exhibits a sharp phase transition from free flowing status to a fully jammed status. A simplified <span class="caps">BML</span> model could be characterized as follows:
</p>
<ul>
<li>Initialized on a 2-D lattice, each site of which is either empty or occupied by a colored particle (blue or red);</li>
<li>Particles are distributed randomly through the initialization according to a uniform distribution; the two colors of particles are equally distributed.</li>
<li>On even time steps, all blue particles attempt to move one site up and an attempt fails if the site to occupy is not empty;</li>
<li>On Odd time steps, all red particles attempt to move one site right and an attempt fails if the site to occupy is not empty;</li>
<li>The lattice is assumed periodic which means when a particle moves out of the lattice, it would move into the lattice from the opposite side.<br />
</blockquote></li>
</ul>
<p>The <span class="caps">BML</span> model specified above is implemented in both R/Python as follows to illustrate the usage of vectorization.</p>
<language>R</language>
<p>chapter2/<span class="caps">BML</span>.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="kn">library</span><span class="p">(</span>R6<span class="p">)</span>
<span class="lineno"> 2 </span>BML <span class="o">=</span> R6Class<span class="p">(</span>
<span class="lineno"> 3 </span> <span class="s">"BML"</span><span class="p">,</span>
<span class="lineno"> 4 </span> public <span class="o">=</span> <span class="kt">list</span><span class="p">(</span>
<span class="lineno"> 5 </span> <span class="c1"># alpha is the parameter of the uniform distribution to control particle distribution's density</span>
<span class="lineno"> 6 </span> <span class="c1"># m*n is the dimension of the lattice</span>
<span class="lineno"> 7 </span> alpha <span class="o">=</span> <span class="kc">NULL</span><span class="p">,</span>
<span class="lineno"> 8 </span> m <span class="o">=</span> <span class="kc">NULL</span><span class="p">,</span>
<span class="lineno"> 9 </span> n <span class="o">=</span> <span class="kc">NULL</span><span class="p">,</span>
<span class="lineno">10 </span> lattice <span class="o">=</span> <span class="kc">NULL</span><span class="p">,</span>
<span class="lineno">11 </span> initialize <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>alpha<span class="p">,</span> m<span class="p">,</span> n<span class="p">)</span> <span class="p">{</span>
<span class="lineno">12 </span> self<span class="o">$</span>alpha <span class="o">=</span> alpha
<span class="lineno">13 </span> self<span class="o">$</span>m <span class="o">=</span> m
<span class="lineno">14 </span> self<span class="o">$</span>n <span class="o">=</span> n
<span class="lineno">15 </span> self<span class="o">$</span>initialize_lattice<span class="p">()</span>
<span class="lineno">16 </span> <span class="p">},</span>
<span class="lineno">17 </span> initialize_lattice <span class="o">=</span> <span class="kr">function</span><span class="p">()</span> <span class="p">{</span>
<span class="lineno">18 </span> <span class="c1"># 0 -> empty site</span>
<span class="lineno">19 </span> <span class="c1"># 1 -> blue particle</span>
<span class="lineno">20 </span> <span class="c1"># 2 -> red particle</span>
<span class="lineno">21 </span> u <span class="o">=</span> runif<span class="p">(</span>self<span class="o">$</span>m <span class="o">*</span> self<span class="o">$</span>n<span class="p">)</span>
<span class="lineno">22 </span> <span class="c1"># the usage of L is to make sure the elements in particles are of type integer;</span>
<span class="lineno">23 </span> <span class="c1"># otherwise they would be created as double</span>
<span class="lineno">24 </span> particles <span class="o">=</span> <span class="kp">rep</span><span class="p">(</span><span class="m">0L</span><span class="p">,</span> self<span class="o">$</span>m <span class="o">*</span> self<span class="o">$</span>n<span class="p">)</span>
<span class="lineno">25 </span> <span class="c1"># doing inverse transform sampling</span>
<span class="lineno">26 </span> particles<span class="p">[(</span>u <span class="o">></span> self<span class="o">$</span>alpha<span class="p">)</span> <span class="o">&</span>
<span class="lineno">27 </span> <span class="p">(</span>u <span class="o"><=</span> <span class="p">(</span>self<span class="o">$</span>alpha <span class="o">+</span> <span class="m">1.0</span><span class="p">)</span> <span class="o">/</span> <span class="m">2</span><span class="p">)]</span> <span class="o">=</span> <span class="m">1L</span>
<span class="lineno">28 </span> particles<span class="p">[</span>u <span class="o">></span> <span class="p">(</span>self<span class="o">$</span>alpha <span class="o">+</span> <span class="m">1.0</span><span class="p">)</span> <span class="o">/</span> <span class="m">2</span><span class="p">]</span> <span class="o">=</span> <span class="m">2L</span>
<span class="lineno">29 </span> self<span class="o">$</span>lattice <span class="o">=</span> <span class="kt">array</span><span class="p">(</span>particles<span class="p">,</span> <span class="kt">c</span><span class="p">(</span>self<span class="o">$</span>m<span class="p">,</span> self<span class="o">$</span>n<span class="p">))</span>
<span class="lineno">30 </span> <span class="p">},</span>
<span class="lineno">31 </span> odd_step <span class="o">=</span> <span class="kr">function</span><span class="p">()</span> <span class="p">{</span>
<span class="lineno">32 </span> blue.index <span class="o">=</span> <span class="kp">which</span><span class="p">(</span>self<span class="o">$</span>lattice <span class="o">==</span> <span class="m">1L</span><span class="p">,</span> arr.ind <span class="o">=</span> <span class="kc">TRUE</span><span class="p">)</span>
<span class="lineno">33 </span> <span class="c1"># make a copy of the index</span>
<span class="lineno">34 </span> blue.up.index <span class="o">=</span> blue.index
<span class="lineno">35 </span> <span class="c1"># blue particles move 1 site up</span>
<span class="lineno">36 </span> blue.up.index<span class="p">[,</span> <span class="m">1</span><span class="p">]</span> <span class="o">=</span> blue.index<span class="p">[,</span> <span class="m">1</span><span class="p">]</span> <span class="o">-</span> <span class="m">1L</span>
<span class="lineno">37 </span> <span class="c1"># periodic boundary condition</span>
<span class="lineno">38 </span> blue.up.index<span class="p">[</span>blue.up.index<span class="p">[,</span> <span class="m">1</span><span class="p">]</span> <span class="o">==</span> <span class="m">0L</span><span class="p">,</span> <span class="m">1</span><span class="p">]</span> <span class="o">=</span> self<span class="o">$</span>m
<span class="lineno">39 </span> <span class="c1"># find which moves are feasible</span>
<span class="lineno">40 </span> blue.movable <span class="o">=</span> self<span class="o">$</span>lattice<span class="p">[</span>blue.up.index<span class="p">]</span> <span class="o">==</span> <span class="m">0L</span>
<span class="lineno">41 </span> <span class="c1"># move blue particles one site up</span>
<span class="lineno">42 </span> <span class="c1"># drop=FALSE prevents the 2D array degenerates to 1D array</span>
<span class="lineno">43 </span> self<span class="o">$</span>lattice<span class="p">[</span>blue.up.index<span class="p">[</span>blue.movable<span class="p">,</span> <span class="p">,</span> drop <span class="o">=</span> <span class="kc">FALSE</span><span class="p">]]</span> <span class="o">=</span> <span class="m">1L</span>
<span class="lineno">44 </span> self<span class="o">$</span>lattice<span class="p">[</span>blue.index<span class="p">[</span>blue.movable<span class="p">,</span> <span class="p">,</span> drop <span class="o">=</span> <span class="kc">FALSE</span><span class="p">]]</span> <span class="o">=</span> <span class="m">0L</span>
<span class="lineno">45 </span> <span class="p">},</span>
<span class="lineno">46 </span> even_step <span class="o">=</span> <span class="kr">function</span><span class="p">()</span> <span class="p">{</span>
<span class="lineno">47 </span> red.index <span class="o">=</span> <span class="kp">which</span><span class="p">(</span>self<span class="o">$</span>lattice <span class="o">==</span> <span class="m">2L</span><span class="p">,</span> arr.ind <span class="o">=</span> <span class="kc">TRUE</span><span class="p">)</span>
<span class="lineno">48 </span> <span class="c1"># make a copy of the index</span>
<span class="lineno">49 </span> red.right.index <span class="o">=</span> red.index
<span class="lineno">50 </span> <span class="c1"># red particles move 1 site right</span>
<span class="lineno">51 </span> red.right.index<span class="p">[,</span> <span class="m">2</span><span class="p">]</span> <span class="o">=</span> red.index<span class="p">[,</span> <span class="m">2</span><span class="p">]</span> <span class="o">+</span> <span class="m">1L</span>
<span class="lineno">52 </span> <span class="c1"># periodic boundary condition</span>
<span class="lineno">53 </span> red.right.index<span class="p">[</span>red.right.index<span class="p">[,</span> <span class="m">2</span><span class="p">]</span> <span class="o">==</span> <span class="p">(</span>self<span class="o">$</span>n <span class="o">+</span> <span class="m">1L</span><span class="p">),</span> <span class="m">2</span><span class="p">]</span> <span class="o">=</span> <span class="m">1</span>
<span class="lineno">54 </span> <span class="c1"># find which moves are feasible</span>
<span class="lineno">55 </span> red.movable <span class="o">=</span> self<span class="o">$</span>lattice<span class="p">[</span>red.right.index<span class="p">]</span> <span class="o">==</span> <span class="m">0L</span>
<span class="lineno">56 </span> <span class="c1"># move red particles one site right</span>
<span class="lineno">57 </span> self<span class="o">$</span>lattice<span class="p">[</span>red.right.index<span class="p">[</span>red.movable<span class="p">,</span> <span class="p">,</span> drop <span class="o">=</span> <span class="kc">FALSE</span><span class="p">]]</span> <span class="o">=</span> <span class="m">2L</span>
<span class="lineno">58 </span> self<span class="o">$</span>lattice<span class="p">[</span>red.index<span class="p">[</span>red.movable<span class="p">,</span> <span class="p">,</span> drop <span class="o">=</span> <span class="kc">FALSE</span><span class="p">]]</span> <span class="o">=</span> <span class="m">0L</span>
<span class="lineno">59 </span> <span class="p">}</span>
<span class="lineno">60 </span> <span class="p">)</span>
<span class="lineno">61 </span><span class="p">)</span></code></pre></figure></p>
<p>Now we can create a simple <span class="caps">BML</span> system on a $5\times5$ lattice using the R code above.</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">'BML.R'</span><span class="p">)</span>
<span class="lineno"> 2 </span><span class="o">></span> <span class="kp">set.seed</span><span class="p">(</span><span class="m">2019</span><span class="p">)</span>
<span class="lineno"> 3 </span><span class="o">></span> bml<span class="o">=</span>BML<span class="o">$</span>new<span class="p">(</span><span class="m">0.4</span><span class="p">,</span><span class="m">5</span><span class="p">,</span><span class="m">5</span><span class="p">)</span>
<span class="lineno"> 4 </span><span class="o">></span> bml<span class="o">$</span>lattice
<span class="lineno"> 5 </span> <span class="p">[,</span><span class="m">1</span><span class="p">]</span> <span class="p">[,</span><span class="m">2</span><span class="p">]</span> <span class="p">[,</span><span class="m">3</span><span class="p">]</span> <span class="p">[,</span><span class="m">4</span><span class="p">]</span> <span class="p">[,</span><span class="m">5</span><span class="p">]</span>
<span class="lineno"> 6 </span><span class="p">[</span><span class="m">1</span><span class="p">,]</span> <span class="m">2</span> <span class="m">0</span> <span class="m">2</span> <span class="m">1</span> <span class="m">1</span>
<span class="lineno"> 7 </span><span class="p">[</span><span class="m">2</span><span class="p">,]</span> <span class="m">2</span> <span class="m">2</span> <span class="m">1</span> <span class="m">0</span> <span class="m">1</span>
<span class="lineno"> 8 </span><span class="p">[</span><span class="m">3</span><span class="p">,]</span> <span class="m">0</span> <span class="m">0</span> <span class="m">0</span> <span class="m">2</span> <span class="m">2</span>
<span class="lineno"> 9 </span><span class="p">[</span><span class="m">4</span><span class="p">,]</span> <span class="m">1</span> <span class="m">0</span> <span class="m">0</span> <span class="m">0</span> <span class="m">0</span>
<span class="lineno">10 </span><span class="p">[</span><span class="m">5</span><span class="p">,]</span> <span class="m">0</span> <span class="m">1</span> <span class="m">1</span> <span class="m">1</span> <span class="m">0</span>
<span class="lineno">11 </span><span class="o">></span> bml<span class="o">$</span>odd_step<span class="p">()</span>
<span class="lineno">12 </span><span class="o">></span> bml<span class="o">$</span>lattice
<span class="lineno">13 </span> <span class="p">[,</span><span class="m">1</span><span class="p">]</span> <span class="p">[,</span><span class="m">2</span><span class="p">]</span> <span class="p">[,</span><span class="m">3</span><span class="p">]</span> <span class="p">[,</span><span class="m">4</span><span class="p">]</span> <span class="p">[,</span><span class="m">5</span><span class="p">]</span>
<span class="lineno">14 </span><span class="p">[</span><span class="m">1</span><span class="p">,]</span> <span class="m">2</span> <span class="m">0</span> <span class="m">2</span> <span class="m">1</span> <span class="m">0</span>
<span class="lineno">15 </span><span class="p">[</span><span class="m">2</span><span class="p">,]</span> <span class="m">2</span> <span class="m">2</span> <span class="m">1</span> <span class="m">0</span> <span class="m">1</span>
<span class="lineno">16 </span><span class="p">[</span><span class="m">3</span><span class="p">,]</span> <span class="m">1</span> <span class="m">0</span> <span class="m">0</span> <span class="m">2</span> <span class="m">2</span>
<span class="lineno">17 </span><span class="p">[</span><span class="m">4</span><span class="p">,]</span> <span class="m">0</span> <span class="m">1</span> <span class="m">1</span> <span class="m">1</span> <span class="m">0</span>
<span class="lineno">18 </span><span class="p">[</span><span class="m">5</span><span class="p">,]</span> <span class="m">0</span> <span class="m">0</span> <span class="m">0</span> <span class="m">0</span> <span class="m">1</span>
<span class="lineno">19 </span><span class="o">></span> bml<span class="o">$</span>even_step<span class="p">()</span>
<span class="lineno">20 </span><span class="o">></span> bml<span class="o">$</span>lattice
<span class="lineno">21 </span> <span class="p">[,</span><span class="m">1</span><span class="p">]</span> <span class="p">[,</span><span class="m">2</span><span class="p">]</span> <span class="p">[,</span><span class="m">3</span><span class="p">]</span> <span class="p">[,</span><span class="m">4</span><span class="p">]</span> <span class="p">[,</span><span class="m">5</span><span class="p">]</span>
<span class="lineno">22 </span><span class="p">[</span><span class="m">1</span><span class="p">,]</span> <span class="m">0</span> <span class="m">2</span> <span class="m">2</span> <span class="m">1</span> <span class="m">0</span>
<span class="lineno">23 </span><span class="p">[</span><span class="m">2</span><span class="p">,]</span> <span class="m">2</span> <span class="m">2</span> <span class="m">1</span> <span class="m">0</span> <span class="m">1</span>
<span class="lineno">24 </span><span class="p">[</span><span class="m">3</span><span class="p">,]</span> <span class="m">1</span> <span class="m">0</span> <span class="m">0</span> <span class="m">2</span> <span class="m">2</span>
<span class="lineno">25 </span><span class="p">[</span><span class="m">4</span><span class="p">,]</span> <span class="m">0</span> <span class="m">1</span> <span class="m">1</span> <span class="m">1</span> <span class="m">0</span>
<span class="lineno">26 </span><span class="p">[</span><span class="m">5</span><span class="p">,]</span> <span class="m">0</span> <span class="m">0</span> <span class="m">0</span> <span class="m">0</span> <span class="m">1</span></code></pre></figure><p>In the initialization step, we used the inverse transform sampling approach<sup class="footnote" id="fnr12"><a href="#fn12">12</a></sup> to generate the status of each site. Inverse transform sampling method is basic but powerful approach to generate r.v. from any probability distribution given its cumulative distribution function (<span class="caps">CDF</span>). Reading the wiki page is enough to master this sampling method.</p>
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="lineno"> 2 </span>
<span class="lineno"> 3 </span><span class="k">class</span> <span class="nc">BML</span><span class="p">:</span>
<span class="lineno"> 4 </span> <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">alpha</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">n</span><span class="p">):</span>
<span class="lineno"> 5 </span> <span class="bp">self</span><span class="o">.</span><span class="n">alpha</span> <span class="o">=</span> <span class="n">alpha</span>
<span class="lineno"> 6 </span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
<span class="lineno"> 7 </span> <span class="bp">self</span><span class="o">.</span><span class="n">initialize_lattice</span><span class="p">()</span>
<span class="lineno"> 8 </span>
<span class="lineno"> 9 </span> <span class="k">def</span> <span class="nf">initialize_lattice</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="lineno">10 </span> <span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
<span class="lineno">11 </span> <span class="c1"># instead of using default list, we use np.array to create the lattice</span>
<span class="lineno">12 </span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros_like</span><span class="p">(</span><span class="n">u</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
<span class="lineno">13 </span> <span class="c1"># the parentheses below can't be ignored</span>
<span class="lineno">14 </span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[(</span><span class="n">u</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">alpha</span><span class="p">)</span> <span class="o">&</span> <span class="p">(</span><span class="n">u</span> <span class="o"><=</span> <span class="p">(</span><span class="mf">1.0</span><span class="o">+</span><span class="bp">self</span><span class="o">.</span><span class="n">alpha</span><span class="p">)</span><span class="o">/</span><span class="mf">2.0</span><span class="p">)]</span> <span class="o">=</span> <span class="mi">1</span>
<span class="lineno">15 </span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[</span><span class="n">u</span> <span class="o">></span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">alpha</span><span class="o">+</span><span class="mf">1.0</span><span class="p">)</span><span class="o">/</span><span class="mf">2.0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">2</span>
<span class="lineno">16 </span>
<span class="lineno">17 </span> <span class="k">def</span> <span class="nf">odd_step</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="lineno">18 </span> <span class="c1"># please note that np.where returns a tuple which is immutable</span>
<span class="lineno">19 </span> <span class="n">blue_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lattice</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span>
<span class="lineno">20 </span> <span class="n">blue_index_i</span> <span class="o">=</span> <span class="n">blue_index</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span>
<span class="lineno">21 </span> <span class="n">blue_index_i</span><span class="p">[</span><span class="n">blue_index_i</span> <span class="o"><</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span>
<span class="lineno">22 </span> <span class="n">blue_movable</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[(</span><span class="n">blue_index_i</span><span class="p">,</span> <span class="n">blue_index</span><span class="p">[</span><span class="mi">1</span><span class="p">])]</span> <span class="o">==</span> <span class="mi">0</span>
<span class="lineno">23 </span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[(</span><span class="n">blue_index_i</span><span class="p">[</span><span class="n">blue_movable</span><span class="p">],</span>
<span class="lineno">24 </span> <span class="n">blue_index</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="n">blue_movable</span><span class="p">])]</span> <span class="o">=</span> <span class="mi">1</span>
<span class="lineno">25 </span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[(</span><span class="n">blue_index</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="n">blue_movable</span><span class="p">],</span>
<span class="lineno">26 </span> <span class="n">blue_index</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="n">blue_movable</span><span class="p">])]</span> <span class="o">=</span> <span class="mi">0</span>
<span class="lineno">27 </span>
<span class="lineno">28 </span> <span class="k">def</span> <span class="nf">even_step</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="lineno">29 </span> <span class="n">red_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lattice</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span>
<span class="lineno">30 </span> <span class="n">red_index_j</span> <span class="o">=</span> <span class="n">red_index</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span>
<span class="lineno">31 </span> <span class="n">red_index_j</span><span class="p">[</span><span class="n">red_index_j</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]]</span> <span class="o">=</span> <span class="mi">0</span>
<span class="lineno">32 </span> <span class="n">red_movable</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[(</span><span class="n">red_index</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">red_index_j</span><span class="p">)]</span> <span class="o">==</span> <span class="mi">0</span>
<span class="lineno">33 </span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[(</span><span class="n">red_index</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="n">red_movable</span><span class="p">],</span>
<span class="lineno">34 </span> <span class="n">red_index_j</span><span class="p">[</span><span class="n">red_movable</span><span class="p">])]</span> <span class="o">=</span> <span class="mi">2</span>
<span class="lineno">35 </span> <span class="bp">self</span><span class="o">.</span><span class="n">lattice</span><span class="p">[(</span><span class="n">red_index</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="n">red_movable</span><span class="p">],</span>
<span class="lineno">36 </span> <span class="n">red_index</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="n">red_movable</span><span class="p">])]</span> <span class="o">=</span> <span class="mi">0</span></code></pre></figure><p>The Python implementation is also given.</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="o">>>></span> import numpy as np
<span class="lineno"> 2 </span><span class="o">>>></span> np.random.seed<span class="p">(</span><span class="m">2019</span><span class="p">)</span>
<span class="lineno"> 3 </span><span class="o">>>></span> from BML import BML
<span class="lineno"> 4 </span><span class="o">>>></span> bml<span class="o">=</span>BML<span class="p">(</span><span class="m">0.4</span><span class="p">,</span><span class="m">5</span><span class="p">,</span><span class="m">5</span><span class="p">)</span>
<span class="lineno"> 5 </span><span class="o">>>></span> bml.lattice
<span class="lineno"> 6 </span><span class="kt">array</span><span class="p">([[</span><span class="m">2</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">],</span>
<span class="lineno"> 7 </span> <span class="p">[</span><span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">],</span>
<span class="lineno"> 8 </span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">0</span><span class="p">],</span>
<span class="lineno"> 9 </span> <span class="p">[</span><span class="m">2</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">],</span>
<span class="lineno">10 </span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">]])</span>
<span class="lineno">11 </span><span class="o">>>></span> bml.odd_step<span class="p">()</span>
<span class="lineno">12 </span><span class="o">>>></span> bml.lattice
<span class="lineno">13 </span><span class="kt">array</span><span class="p">([[</span><span class="m">2</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">],</span>
<span class="lineno">14 </span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">],</span>
<span class="lineno">15 </span> <span class="p">[</span><span class="m">0</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">0</span><span class="p">],</span>
<span class="lineno">16 </span> <span class="p">[</span><span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">],</span>
<span class="lineno">17 </span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">]])</span>
<span class="lineno">18 </span><span class="o">>>></span> bml.even_step<span class="p">()</span>
<span class="lineno">19 </span><span class="o">>>></span> bml.lattice
<span class="lineno">20 </span><span class="kt">array</span><span class="p">([[</span><span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">],</span>
<span class="lineno">21 </span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">],</span>
<span class="lineno">22 </span> <span class="p">[</span><span class="m">0</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">],</span>
<span class="lineno">23 </span> <span class="p">[</span><span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">2</span><span class="p">],</span>
<span class="lineno">24 </span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">]])</span></code></pre></figure><p>Please note that although we have imported <code>numpy</code> in <span class="caps">BML</span>.py, we import it again in the code above in order to set the random seed. If we change the line to <code>from BML import *</code>, we don’t need to import <code>numpy</code> again. But it is not recommended to <code>import *</code> from a module.</p>
<h2 id="parallelism">Embarrassingly parallelism in R/Python</h2>
<p>According to the explanation of wikipedia<sup class="footnote" id="fnr13"><a href="#fn13">13</a></sup>, single-threading is the processing of one command at a time, and its opposite is multithreading. A process is the instance of a computer program executed by one or many threads<sup class="footnote" id="fnr14"><a href="#fn14">14</a></sup>. Multithreading is not the same as parallelism. In a single processor environment, the processor can switch execution between threads, which results in concurrent execution. However, it is possible a process with multithreads runs on on a multiprocessor environment and each of the threads on a separate processor, which results in parallel execution.</p>
<p>Both R and Python are single-threaded. In Python, there is a <code>threading</code> package<sup class="footnote" id="fnr15"><a href="#fn15">15</a></sup>, which supports multithreading on a single core. It may suit some specific tasks. For example, in web scraping multithreading on a single core may be able to speed up the program if the download time is in excess of the <span class="caps">CPU</span> processing time.</p>
<p>Now let’s talk about embarrassingly parallelism by multi-processing. Embarrassingly parallel problem is one where little or no effort is needed to separate the problem into a number of parallel tasks<sup class="footnote" id="fnr16"><a href="#fn16">16</a></sup>. In R there are various packages supporting multi-processing on multiple <span class="caps">CPU</span> cores, for example, the <code>parallel</code> package, which is my favorite one. In Python, there are also some available modules, such as <code>multiprocessing</code>, <code>joblib</code> and <code>concurrent.futures</code>. Let’s see an application of embarrassingly parallelism to calculate $\pi$ using Monte Carlo simulation<sup class="footnote" id="fnr17"><a href="#fn17">17</a></sup>.</p>
<appname>Application – Monte Carlo simulation to estimate $\pi$ via parallelization</appname>
<blockquote class="appquote">
<p>
<p>Monte Carlo simulation provides a simple and straightforward way to estimate $\pi$. We know the area of a circle with radius 1 is just $\pi$. Thus, we can convert the original problem of $\pi$ calculation to a new problem, i.e., how to calculate the area of a circle with radius 1. We also know the area of a square with side length 2 is equal to 4. Thus, $\pi$ can be calculated as 4$r_{c/s}$ where $r_{c/s}$ denotes the ratio of areas of a circle with radius 1 and a square with side length 2. Now the problem is how to calculate the ratio $r_{c/s}$? When we randomly throw $n$ points into the square and $m$ of these pionts fall into the inscribed circle, then we can estimate the ratio as $m/n$. As a result, a natural estimate of $\pi$ is $4m/n$. This problem is an embarrassingly parallel problem by its nature. Let’s see how we implement the idea in R/Python.</p>
</p>
</blockquote>
<figure class="text-center">
<img src="figures/circle.png" alt="Generate points within a square and count how many times these points fall into the inscribed circle" style="display: block;margin: auto;" width="35%">
<figcaption class="centerfigcaption">Generate points within a square and count how many times these points fall into the inscribed circle</figcaption>
</figure>
<language>R</language>
<p>chapter2/pi.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="kn">library</span><span class="p">(</span>parallel<span class="p">)</span>
<span class="lineno"> 2 </span>count_inside_point <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">){</span>
<span class="lineno"> 3 </span> m <span class="o">=</span> <span class="m">0</span>
<span class="lineno"> 4 </span> <span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> <span class="m">1</span><span class="o">:</span>n<span class="p">){</span>
<span class="lineno"> 5 </span> p_x <span class="o">=</span> runif<span class="p">(</span><span class="m">1</span><span class="p">,</span> <span class="m">-1</span><span class="p">,</span> <span class="m">1</span><span class="p">)</span>
<span class="lineno"> 6 </span> p_y <span class="o">=</span> runif<span class="p">(</span><span class="m">1</span><span class="p">,</span> <span class="m">-1</span><span class="p">,</span> <span class="m">1</span><span class="p">)</span>
<span class="lineno"> 7 </span> <span class="kr">if</span> <span class="p">(</span>p_x<span class="o">^</span><span class="m">2</span> <span class="o">+</span> p_y<span class="o">^</span><span class="m">2</span> <span class="o"><=</span><span class="m">1</span><span class="p">){</span>
<span class="lineno"> 8 </span> m <span class="o">=</span> m<span class="m">+1</span>
<span class="lineno"> 9 </span> <span class="p">}</span>
<span class="lineno">10 </span> <span class="p">}</span>
<span class="lineno">11 </span> m
<span class="lineno">12 </span><span class="p">}</span>
<span class="lineno">13 </span>
<span class="lineno">14 </span><span class="c1"># now let's use the mcapply for parallelization</span>
<span class="lineno">15 </span>generate_points_parallel <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">){</span>
<span class="lineno">16 </span> <span class="c1"># detectCores() returns the number of cores available</span>
<span class="lineno">17 </span> <span class="c1"># we assign the task to each core</span>
<span class="lineno">18 </span> <span class="kp">unlist</span><span class="p">(</span>mclapply<span class="p">(</span>X <span class="o">=</span> <span class="kp">rep</span><span class="p">(</span>n <span class="o">%/%</span> detectCores<span class="p">(),</span> detectCores<span class="p">()),</span>FUN<span class="o">=</span>count_inside_point<span class="p">))</span>
<span class="lineno">19 </span><span class="p">}</span>
<span class="lineno">20 </span>
<span class="lineno">21 </span><span class="c1"># now let's use vectorization</span>
<span class="lineno">22 </span>generate_points_vectorized <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">){</span>
<span class="lineno">23 </span> p <span class="o">=</span> <span class="kt">array</span><span class="p">(</span>runif<span class="p">(</span>n<span class="o">*</span><span class="m">2</span><span class="p">,</span><span class="m">-1</span><span class="p">,</span><span class="m">1</span><span class="p">),</span> <span class="kt">c</span><span class="p">(</span>n<span class="p">,</span><span class="m">2</span><span class="p">))</span>
<span class="lineno">24 </span> <span class="kp">sum</span><span class="p">((</span>p<span class="p">[,</span><span class="m">1</span><span class="p">]</span><span class="o">^</span><span class="m">2</span><span class="o">+</span>p<span class="p">[,</span><span class="m">2</span><span class="p">]</span><span class="o">^</span><span class="m">2</span><span class="p">)</span><span class="o"><=</span><span class="m">1</span><span class="p">)</span>
<span class="lineno">25 </span><span class="p">}</span>
<span class="lineno">26 </span>
<span class="lineno">27 </span>pi_naive <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">)</span> <span class="kp">cat</span><span class="p">(</span><span class="s">'naive: pi -'</span><span class="p">,</span> <span class="m">4</span><span class="o">*</span>count_inside_point<span class="p">(</span>n<span class="p">)</span><span class="o">/</span>n<span class="p">,</span> <span class="s">'\n'</span><span class="p">)</span>
<span class="lineno">28 </span>
<span class="lineno">29 </span>pi_parallel <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">)</span> <span class="kp">cat</span><span class="p">(</span><span class="s">'parallel: pi -'</span><span class="p">,</span> <span class="m">4</span><span class="o">*</span><span class="kp">sum</span><span class="p">(</span>generate_points_parallel<span class="p">(</span>n<span class="p">))</span><span class="o">/</span>n<span class="p">,</span> <span class="s">'\n'</span><span class="p">)</span>
<span class="lineno">30 </span>
<span class="lineno">31 </span>pi_vectorized <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">)</span> <span class="kp">cat</span><span class="p">(</span><span class="s">'vectorized: pi -'</span><span class="p">,</span> <span class="m">4</span><span class="o">*</span><span class="kp">sum</span><span class="p">(</span>generate_points_vectorized<span class="p">(</span>n<span class="p">))</span><span class="o">/</span>n<span class="p">,</span> <span class="s">'\n'</span><span class="p">)</span></code></pre></figure></p>
<p>In the above R code snippet, we use the function <code>mclapply</code> which is not currently available on some operation systems<sup class="footnote" id="fnr18"><a href="#fn18">18</a></sup>. When it is not available, we may consider to use <code>parLapply</code> instead.</p>
<language>Python</language>
<p>chapter2/pi.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="c1"># now let's try the parallel approach</span>
<span class="lineno"> 2 </span><span class="c1"># each process uses the same seed, which is not desired</span>
<span class="lineno"> 3 </span><span class="k">def</span> <span class="nf">generate_points_parallel</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno"> 4 </span> <span class="n">pool</span> <span class="o">=</span> <span class="n">mp</span><span class="o">.</span><span class="n">Pool</span><span class="p">()</span>
<span class="lineno"> 5 </span> <span class="c1"># we ask each process to generate n//mp.cpu_count() points</span>
<span class="lineno"> 6 </span> <span class="k">return</span> <span class="n">pool</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">count_inside_point</span><span class="p">,</span> <span class="p">[</span><span class="n">n</span><span class="o">//</span><span class="n">mp</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">()]</span><span class="o">*</span><span class="n">mp</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">())</span>
<span class="lineno"> 7 </span>
<span class="lineno"> 8 </span><span class="c1"># set seed for each process</span>
<span class="lineno"> 9 </span><span class="c1"># first, let's define a helper function</span>
<span class="lineno">10 </span><span class="k">def</span> <span class="nf">helper</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
<span class="lineno">11 </span> <span class="n">n</span><span class="p">,</span> <span class="n">s</span> <span class="o">=</span> <span class="n">args</span>
<span class="lineno">12 </span> <span class="n">seed</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
<span class="lineno">13 </span> <span class="k">return</span> <span class="n">count_inside_point</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
<span class="lineno">14 </span>
<span class="lineno">15 </span><span class="k">def</span> <span class="nf">generate_points_parallel_set_seed</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">16 </span> <span class="n">pool</span> <span class="o">=</span> <span class="n">mp</span><span class="o">.</span><span class="n">Pool</span><span class="p">()</span> <span class="c1"># we can also specify the number of processes by Pool(number)</span>
<span class="lineno">17 </span> <span class="c1"># we ask each process to generate n//mp.cpu_count() points</span>
<span class="lineno">18 </span> <span class="k">return</span> <span class="n">pool</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">helper</span><span class="p">,</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">([</span><span class="n">n</span><span class="o">//</span><span class="n">mp</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">()]</span><span class="o">*</span><span class="n">mp</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">(),</span> <span class="nb">range</span><span class="p">(</span><span class="n">mp</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">()))))</span>
<span class="lineno">19 </span>
<span class="lineno">20 </span><span class="c1"># another optimization via vectorization</span>
<span class="lineno">21 </span><span class="k">def</span> <span class="nf">generate_points_vectorized</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">22 </span> <span class="n">p</span> <span class="o">=</span> <span class="n">uniform</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="p">(</span><span class="n">n</span><span class="p">,</span><span class="mi">2</span><span class="p">))</span>
<span class="lineno">23 </span> <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="o"><=</span> <span class="mi">1</span><span class="p">)</span>
<span class="lineno">24 </span>
<span class="lineno">25 </span><span class="k">def</span> <span class="nf">pi_naive</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">26 </span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'pi: {count_inside_point(n)/n*4:.6f}'</span><span class="p">)</span>
<span class="lineno">27 </span>
<span class="lineno">28 </span><span class="k">def</span> <span class="nf">pi_parallel</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">29 </span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'pi: {sum(generate_points_parallel_set_seed(n))/n*4:.6f}'</span><span class="p">)</span>
<span class="lineno">30 </span>
<span class="lineno">31 </span><span class="k">def</span> <span class="nf">pi_vectorized</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">32 </span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'pi: {generate_points_vectorized(n)/n*4:.6f}'</span><span class="p">)</span></code></pre></figure></p>
<p>In the above Python code snippet, we defined a function <code>generate_points_parallel</code> which returns a list of number of inside points. But the problem of this function is that each process in the pool shares the same random state. As a result, we will obtain the a list of duplicated numbers by calling this function. To overcome the issue, we defined another function <code>generate_points_parallel_set_seed</code>. Let’s see the results of our estimation for $\pi$ running on a laptop.</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">'pi.R'</span><span class="p">)</span>
<span class="lineno"> 2 </span><span class="o">></span> <span class="kp">system.time</span><span class="p">(</span>pi_naive<span class="p">(</span><span class="m">1e6</span><span class="p">))</span>
<span class="lineno"> 3 </span>naive<span class="o">:</span> <span class="kc">pi</span> <span class="o">-</span> <span class="m">3.144592</span>
<span class="lineno"> 4 </span> user system elapsed
<span class="lineno"> 5 </span> <span class="m">8.073</span> <span class="m">1.180</span> <span class="m">9.333</span>
<span class="lineno"> 6 </span><span class="o">></span> <span class="kp">system.time</span><span class="p">(</span>pi_parallel<span class="p">(</span><span class="m">1e6</span><span class="p">))</span>
<span class="lineno"> 7 </span>parallel<span class="o">:</span> <span class="kc">pi</span> <span class="o">-</span> <span class="m">3.1415</span>
<span class="lineno"> 8 </span> user system elapsed
<span class="lineno"> 9 </span> <span class="m">4.107</span> <span class="m">0.560</span> <span class="m">4.833</span>
<span class="lineno">10 </span><span class="o">></span> <span class="kp">system.time</span><span class="p">(</span>pi_vectorized<span class="p">(</span><span class="m">1e6</span><span class="p">))</span>
<span class="lineno">11 </span>vectorized<span class="o">:</span> <span class="kc">pi</span> <span class="o">-</span> <span class="m">3.141224</span>
<span class="lineno">12 </span> user system elapsed
<span class="lineno">13 </span> <span class="m">0.180</span> <span class="m">0.031</span> <span class="m">0.214</span> </code></pre></figure><language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="o">>>></span> <span class="kn">import</span> <span class="nn">timeit</span>
<span class="lineno"> 2 </span><span class="o">>>></span> <span class="kn">from</span> <span class="nn">pi</span> <span class="k">import</span> <span class="n">pi_naive</span><span class="p">,</span> <span class="n">pi_parallel</span><span class="p">,</span> <span class="n">pi_vectorized</span>
<span class="lineno"> 3 </span><span class="o">>>></span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'naive - {timeit.timeit("pi_naive(1000000)", setup="from __main__ import pi_naive",number=1):.6f} seconds'</span><span class="p">)</span>
<span class="lineno"> 4 </span><span class="n">pi</span><span class="p">:</span> <span class="mf">3.141056</span>
<span class="lineno"> 5 </span><span class="n">naive</span> <span class="o">-</span> <span class="mf">4.363822</span> <span class="n">seconds</span>
<span class="lineno"> 6 </span><span class="o">>>></span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'parallel - {timeit.timeit("pi_parallel(1000000)", setup="from __main__ import pi_parallel",number=1):.6f} seconds'</span><span class="p">)</span>
<span class="lineno"> 7 </span><span class="n">pi</span><span class="p">:</span> <span class="mf">3.141032</span>
<span class="lineno"> 8 </span><span class="n">parallel</span> <span class="o">-</span> <span class="mf">2.204697</span> <span class="n">seconds</span>
<span class="lineno"> 9 </span><span class="o">>>></span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'vectorized - {timeit.timeit("pi_vectorized(1000000)", setup="from __main__ import pi_vectorized",number=1):.6f} seconds'</span><span class="p">)</span>
<span class="lineno">10 </span><span class="n">pi</span><span class="p">:</span> <span class="mf">3.139936</span>
<span class="lineno">11 </span><span class="n">vectorized</span> <span class="o">-</span> <span class="mf">0.148950</span> <span class="n">seconds</span></code></pre></figure><p>We see the winner in this example is vectorization, and the parallel solution is better than the naive solution. However, when the problem cannot be vectorized we may use parallelization to achieve better performance.</p>
<h2 id="evaluation">Evaluation strategy</h2>
<p>R is a lazy programming language since it uses lazy evaluation by default<sup class="footnote" id="fnr19"><a href="#fn19">19</a></sup>. Lazy evaluation strategy delays the evaluation of an expression until its value is needed. When an expression in R is evaluated, it follows an outermost reduction order in general. But Python is not a lazy programming language and the expression in Python follows an innermost reduction order.</p>
<p>The code snippets below illustrate the innermost reduction vs. outermost reduction.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="o">></span> divide <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">,</span> y<span class="p">)</span> <span class="p">{</span><span class="kp">stopifnot</span><span class="p">(</span>y<span class="o">!=</span><span class="m">0</span><span class="p">);</span> <span class="kr">return</span><span class="p">(</span>x<span class="o">/</span>y<span class="p">)}</span>
<span class="lineno">2 </span><span class="o">></span> power <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">,</span> p<span class="p">){</span><span class="kr">if</span> <span class="p">(</span>p<span class="o">==</span><span class="m">0</span><span class="p">)</span> <span class="m">1</span> <span class="kr">else</span> x<span class="o">^</span>p<span class="p">}</span>
<span class="lineno">3 </span><span class="o">></span> power<span class="p">(</span>divide<span class="p">(</span><span class="m">1</span><span class="p">,</span><span class="m">0</span><span class="p">),</span><span class="m">2</span><span class="p">)</span>
<span class="lineno">4 </span>Error <span class="kr">in</span> divide<span class="p">(</span><span class="m">1</span><span class="p">,</span> <span class="m">0</span><span class="p">)</span> <span class="o">:</span> y <span class="o">!=</span> <span class="m">0</span> is not <span class="kc">TRUE</span>
<span class="lineno">5 </span><span class="o">></span> power<span class="p">(</span>divide<span class="p">(</span><span class="m">1</span><span class="p">,</span><span class="m">0</span><span class="p">),</span><span class="m">0</span><span class="p">)</span>
<span class="lineno">6 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">1</span></code></pre></figure></div>
<div class="coderight">
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="o">>>></span> <span class="k">def</span> <span class="nf">divide</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span> <span class="k">return</span> <span class="n">x</span><span class="o">/</span><span class="n">y</span>
<span class="lineno">2 </span><span class="o">>>></span> <span class="k">def</span> <span class="nf">power</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">p</span><span class="p">):</span> <span class="k">return</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">p</span><span class="o">==</span><span class="mi">0</span> <span class="k">else</span> <span class="n">x</span><span class="o">**</span><span class="n">p</span>
<span class="lineno">3 </span><span class="o">>>></span> <span class="n">power</span><span class="p">(</span><span class="n">divide</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">),</span><span class="mi">0</span><span class="p">)</span>
<span class="lineno">4 </span><span class="n">Traceback</span> <span class="p">(</span><span class="n">most</span> <span class="n">recent</span> <span class="n">call</span> <span class="n">last</span><span class="p">):</span>
<span class="lineno">5 </span> <span class="n">File</span> <span class="s2">"<stdin>"</span><span class="p">,</span> <span class="n">line</span> <span class="mi">1</span><span class="p">,</span> <span class="ow">in</span> <span class="o"><</span><span class="n">module</span><span class="o">></span>
<span class="lineno">6 </span> <span class="n">File</span> <span class="s2">"<stdin>"</span><span class="p">,</span> <span class="n">line</span> <span class="mi">1</span><span class="p">,</span> <span class="ow">in</span> <span class="n">divide</span>
<span class="lineno">7 </span><span class="ne">ZeroDivisionError</span><span class="p">:</span> <span class="n">division</span> <span class="n">by</span> <span class="n">zero</span></code></pre></figure></div>
</div>
<p>Because of the outermost reduction order, the R code snippet evaluate the function <code>power</code> first and since if second argument is 0 the first argument is not required to evaluate. Thus, the function call returns 1. But the Python code snippet first evaluates the function call of <code>divide</code> and an exception is raised because of division by zero.</p>
<p>Although Python is an eager language, it is possible to simulate the lazy evaluation behavior. For example, the code inside a generator<sup class="footnote" id="fnr20"><a href="#fn20">20</a></sup> is evaluated when the generator is consumed but not evaluated when it is created. We can create a sequence of Fibonacci numbers with the help of <code>generator</code>.</p>
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="o">>>></span> <span class="k">def</span> <span class="nf">fib</span><span class="p">():</span>
<span class="lineno"> 2 </span><span class="o">...</span> <span class="s1">'''</span>
<span class="lineno"> 3 </span><span class="s1">... a generator to create Fibonacci numbers less than 10</span>
<span class="lineno"> 4 </span><span class="s1">... '''</span>
<span class="lineno"> 5 </span><span class="o">...</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span>
<span class="lineno"> 6 </span><span class="o">...</span> <span class="k">while</span> <span class="n">a</span><span class="o"><</span><span class="mi">10</span><span class="p">:</span>
<span class="lineno"> 7 </span><span class="o">...</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="o">=</span> <span class="n">b</span><span class="p">,</span> <span class="n">a</span><span class="o">+</span><span class="n">b</span>
<span class="lineno"> 8 </span><span class="o">...</span> <span class="k">yield</span> <span class="n">a</span>
<span class="lineno"> 9 </span><span class="o">...</span>
<span class="lineno">10 </span><span class="o">>>></span> <span class="n">f</span> <span class="o">=</span> <span class="n">fib</span><span class="p">()</span>
<span class="lineno">11 </span><span class="o">>>></span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="lineno">12 </span><span class="o"><</span><span class="n">generator</span> <span class="nb">object</span> <span class="n">fib</span> <span class="n">at</span> <span class="mh">0x116dfc570</span><span class="o">></span>
<span class="lineno">13 </span><span class="o">>>></span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">f</span><span class="p">:</span> <span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
<span class="lineno">14 </span><span class="o">...</span>
<span class="lineno">15 </span><span class="mi">1</span>
<span class="lineno">16 </span><span class="mi">1</span>
<span class="lineno">17 </span><span class="mi">2</span>
<span class="lineno">18 </span><span class="mi">3</span>
<span class="lineno">19 </span><span class="mi">5</span>
<span class="lineno">20 </span><span class="mi">8</span>
<span class="lineno">21 </span><span class="mi">13</span>
<span class="lineno">22 </span><span class="o">>>></span> </code></pre></figure><p>In the code snippet above, we create a generator which generates the sequence of Fibonacci numbers less than 10. When the generator is created, the sequence is not generated immediately. When we consume the generator in a loop, each element is then generated as needed. It is also common to consume the generator with the <code>next</code> function.</p>
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="o">>>></span> <span class="n">f</span> <span class="o">=</span> <span class="n">fib</span><span class="p">()</span>
<span class="lineno"> 2 </span><span class="o">>>></span> <span class="nb">next</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="lineno"> 3 </span><span class="mi">1</span>
<span class="lineno"> 4 </span><span class="o">>>></span> <span class="nb">next</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="lineno"> 5 </span><span class="mi">1</span>
<span class="lineno"> 6 </span><span class="o">>>></span> <span class="nb">next</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="lineno"> 7 </span><span class="mi">2</span>
<span class="lineno"> 8 </span><span class="o">>>></span> <span class="nb">next</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="lineno"> 9 </span><span class="mi">3</span>
<span class="lineno">10 </span><span class="o">>>></span> <span class="nb">next</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="lineno">11 </span><span class="mi">5</span></code></pre></figure><h2 id="cpp">Speed up with C/C++ in R/Python</h2>
<p>The purpose of this section is not to teach C/C++. If you know how to write C/C++ then it may help to improve the performance of R/Python program.</p>
<p>It is recommended to use vectorization technique to speed up your program whenever possible. But what if it’s infeasible to vectorize the algorithm? One potential option is to use C/C++ in the R/Python code. According to my limited experience, it is much easier to use C/C++ in R with the help of <code>Rcpp</code> package than in Python. In Python, more options are available but not as straightforward as the solution of <code>Rcpp</code>. In this section, let’s use <code>Cython</code><sup class="footnote" id="fnr21"><a href="#fn21">21</a></sup>. Actually, <code>Cython</code> itself is a programming language written in Python and C. In <code>Rcpp</code> we can write C++ code directly and use it in native R code. With the help of <code>Cython</code>, we can write python-like code which is able to be compiled to C or C++ and automatically wrapped into python-importable modules. <code>Cython</code> could also wrap independent C or C++ code into python-importable modules.</p>
<p>Let’s see how to calculate Fibonacci numbers with these tools. The first step is to install <code>Rcpp</code> package in R and the <code>Cython</code> module (use <code>pip</code>) in Python. Once they are installed, we can write the code in C++ directly for R. As for <code>Cython</code>, we write the python-like code which would be compiled to C/C++ later.</p>
<div class="codewrapper">
<div class="codeleft">
<language>C</language>
<p>chapter2/Fibonacci.cpp<br />
<figure class="highlight"><pre><code class="language-c" data-lang="c"><span></span><span class="lineno"> 1 </span><span class="cp">#include</span> <span class="cpf"><Rcpp.h></span><span class="cp"></span>
<span class="lineno"> 2 </span><span class="n">using</span> <span class="n">namespace</span> <span class="n">Rcpp</span><span class="p">;</span>
<span class="lineno"> 3 </span>
<span class="lineno"> 4 </span><span class="c1">// [[Rcpp::export]]</span>
<span class="lineno"> 5 </span><span class="kt">int</span> <span class="nf">Fibonacci</span><span class="p">(</span><span class="kt">int</span> <span class="n">n</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno"> 6 </span> <span class="k">if</span> <span class="p">(</span><span class="n">n</span><span class="o">==</span><span class="mi">1</span> <span class="o">&&</span> <span class="n">n</span><span class="o">==</span><span class="mi">2</span><span class="p">){</span>
<span class="lineno"> 7 </span> <span class="k">return</span> <span class="mi">1</span><span class="p">;</span>
<span class="lineno"> 8 </span> <span class="p">}</span>
<span class="lineno"> 9 </span> <span class="k">return</span> <span class="n">Fibonacci</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">+</span><span class="n">Fibonacci</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">);</span>
<span class="lineno">10 </span><span class="p">}</span></code></pre></figure></p>
</div>
<div class="coderight">
<language>Cython</language>
<p>chapter2/Fibonacci.pyx<br />
<figure class="highlight"><pre><code class="language-cython" data-lang="cython"><span></span><span class="lineno"> 1 </span><span class="c">#!python</span>
<span class="lineno"> 2 </span><span class="c">#cython: language_level=3</span>
<span class="lineno"> 3 </span>
<span class="lineno"> 4 </span><span class="k">cdef</span> <span class="kt">int</span> <span class="nf">Fibonacci_c</span><span class="p">(</span><span class="nb">int</span> <span class="n">n</span><span class="p">):</span>
<span class="lineno"> 5 </span> <span class="k">if</span> <span class="n">n</span><span class="o">==</span><span class="mf">1</span> <span class="ow">or</span> <span class="n">n</span><span class="o">==</span><span class="mf">2</span><span class="p">:</span>
<span class="lineno"> 6 </span> <span class="k">return</span> <span class="mf">1</span>
<span class="lineno"> 7 </span> <span class="k">return</span> <span class="n">Fibonacci_c</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mf">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">Fibonacci_c</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mf">2</span><span class="p">)</span>
<span class="lineno"> 8 </span>
<span class="lineno"> 9 </span><span class="k">def</span> <span class="nf">Fibonacci_static</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">10 </span> <span class="k">return</span> <span class="n">Fibonacci_c</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
<span class="lineno">11 </span>
<span class="lineno">12 </span><span class="k">def</span> <span class="nf">Fibonacci</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">13 </span> <span class="k">if</span> <span class="n">n</span><span class="o">==</span><span class="mf">1</span> <span class="ow">or</span> <span class="n">n</span><span class="o">==</span><span class="mf">2</span><span class="p">:</span>
<span class="lineno">14 </span> <span class="k">return</span> <span class="mf">1</span>
<span class="lineno">15 </span> <span class="k">return</span> <span class="n">Fibonacci</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mf">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">Fibonacci</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mf">2</span><span class="p">)</span></code></pre></figure></p>
</div>
</div>
<p>It’s worth noting the extension of the <code>Cython</code> file above is <code>pyx</code>, not <code>py</code>. And in <code>Fibonacci.pyx</code>, the function we defined is quite following the native Python syntax. But in <code>Cython</code> we can add static typing declarations which are often helpful to improve the performance, although not mandatory. The keyword <code>cdef</code> makes the function <code>Fibonacci_c</code> invisible to Python and thus we have to define the <code>Fibonacci_static</code> function as a wrapper which can be imported into Python. The function <code>Fibonacci</code> is not static typed for benchmarking.</p>
<p>Let’s also implement the same functions in native R/Python for benchmarking purpose.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<p>chapter2/Fibonacci.R<br />
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span>Fibonacci_native <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>n<span class="p">){</span>
<span class="lineno">2 </span> <span class="kr">if</span> <span class="p">(</span>n<span class="o">==</span><span class="m">1</span> <span class="o">||</span> n<span class="o">==</span><span class="m">2</span><span class="p">){</span>
<span class="lineno">3 </span> <span class="m">1</span>
<span class="lineno">4 </span> <span class="p">}</span><span class="kr">else</span> <span class="p">{</span>
<span class="lineno">5 </span> Fibonacci_native<span class="p">(</span>n<span class="m">-1</span><span class="p">)</span> <span class="o">+</span> Fibonacci_native<span class="p">(</span>n<span class="m">-2</span><span class="p">)</span>
<span class="lineno">6 </span> <span class="p">}</span>
<span class="lineno">7 </span><span class="p">}</span></code></pre></figure></p>
</div>
<div class="coderight">
<language>Python</language>
<p>chapter2/Fibonacci.py<br />
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="k">def</span> <span class="nf">Fibonacci_native</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="lineno">2 </span> <span class="k">if</span> <span class="n">n</span><span class="o">==</span><span class="mi">1</span> <span class="ow">or</span> <span class="n">n</span><span class="o">==</span><span class="mi">2</span><span class="p">:</span>
<span class="lineno">3 </span> <span class="k">return</span> <span class="mi">1</span>
<span class="lineno">4 </span> <span class="k">return</span> <span class="n">Fibonacci_native</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">Fibonacci_native</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">)</span></code></pre></figure></p>
</div>
</div>
<p>Now let’s compare the performance of different implementations.</p>
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="o">></span> <span class="kn">library</span><span class="p">(</span>microbenchmark<span class="p">)</span>
<span class="lineno"> 2 </span><span class="o">></span> <span class="kn">library</span><span class="p">(</span>Rcpp<span class="p">)</span>
<span class="lineno"> 3 </span><span class="o">></span> <span class="kn">source</span><span class="p">(</span><span class="s">"Fibonacci_native.R"</span><span class="p">)</span>
<span class="lineno"> 4 </span><span class="o">></span> sourceCpp<span class="p">(</span><span class="s">'Fibonacci.cpp'</span><span class="p">)</span>
<span class="lineno"> 5 </span><span class="o">></span> <span class="c1"># the native implementation in R</span>
<span class="lineno"> 6 </span><span class="o">></span> microbenchmark<span class="p">(</span>Fibonacci_native<span class="p">(</span><span class="m">20</span><span class="p">),</span> times<span class="o">=</span><span class="m">1000</span><span class="p">)</span>
<span class="lineno"> 7 </span>Unit<span class="o">:</span> milliseconds
<span class="lineno"> 8 </span> expr min lq mean median uq max neval
<span class="lineno"> 9 </span> Fibonacci_native<span class="p">(</span><span class="m">20</span><span class="p">)</span> <span class="m">3.917138</span> <span class="m">4.056468</span> <span class="m">4.456824</span> <span class="m">4.190078</span> <span class="m">4.462815</span> <span class="m">26.30846</span> <span class="m">1000</span>
<span class="lineno">10 </span><span class="o">></span> the C<span class="o">++</span> implementation
<span class="lineno">11 </span><span class="o">></span> microbenchmark<span class="p">(</span>Fibonacci<span class="p">(</span><span class="m">20</span><span class="p">),</span> times<span class="o">=</span><span class="m">1000</span><span class="p">)</span>
<span class="lineno">12 </span>Unit<span class="o">:</span> microseconds
<span class="lineno">13 </span> expr min lq mean median uq max neval
<span class="lineno">14 </span> Fibonacci<span class="p">(</span><span class="m">20</span><span class="p">)</span> <span class="m">14.251</span> <span class="m">14.482</span> <span class="m">15.81708</span> <span class="m">14.582</span> <span class="m">14.731</span> <span class="m">775.095</span> <span class="m">1000</span></code></pre></figure><p>The native implementation in R is much slower the C++ implementation (see the difference of units).</p>
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno"> 1 </span><span class="o">>>></span> <span class="kn">import</span> <span class="nn">pyximport</span>
<span class="lineno"> 2 </span><span class="o">>>></span> <span class="n">pyximport</span><span class="o">.</span><span class="n">install</span><span class="p">()</span>
<span class="lineno"> 3 </span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="o"><</span><span class="n">pyximport</span><span class="o">.</span><span class="n">pyximport</span><span class="o">.</span><span class="n">PyxImporter</span> <span class="nb">object</span> <span class="n">at</span> <span class="mh">0x1056f9f98</span><span class="o">></span><span class="p">)</span>
<span class="lineno"> 4 </span><span class="o">>>></span> <span class="kn">from</span> <span class="nn">Fibonacci</span> <span class="k">import</span> <span class="n">Fibonacci_static</span><span class="p">,</span> <span class="n">Fibonacci</span>
<span class="lineno"> 5 </span><span class="o">>>></span> <span class="kn">from</span> <span class="nn">Fibonacci_native</span> <span class="k">import</span> <span class="n">Fibonacci_native</span>
<span class="lineno"> 6 </span><span class="o">>>></span> <span class="kn">import</span> <span class="nn">timeit</span>
<span class="lineno"> 7 </span><span class="o">>>></span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'{timeit.timeit("Fibonacci_native(20)", setup="from __main__ import Fibonacci_native",number=1000):.6f} seconds'</span><span class="p">)</span>
<span class="lineno"> 8 </span><span class="mf">1.927359</span> <span class="n">seconds</span>
<span class="lineno"> 9 </span><span class="o">>>></span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'{timeit.timeit("Fibonacci(20)", setup="from __main__ import Fibonacci",number=1000):.6f} seconds'</span><span class="p">)</span>
<span class="lineno">10 </span><span class="mf">0.316726</span> <span class="n">seconds</span>
<span class="lineno">11 </span><span class="o">>>></span> <span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'{timeit.timeit("Fibonacci_static(20)", setup="from __main__ import Fibonacci_static",number=1000):.6f} seconds'</span><span class="p">)</span>
<span class="lineno">12 </span><span class="mf">0.012741</span> <span class="n">seconds</span></code></pre></figure><p>The results show the static typed implementation is the fastest, and the native implementation in pure Python is the slowest. Again the time measured by <code>timeit.timeit</code> is the total time to run the main statement 1000 times. The average time per run of <code>Fibonacci_static</code> function is close to the average time per run of <code>Fibonacci</code> in R.</p>
<h2 id="fp">A first impression of functional programming</h2>
<p>Functional (programming) languages define programs as mathematical functions and treat them as first-class2]. In pure functional programming, a function takes input and returns output without making any side effect. A side effect means a state variable outside the function is modified. For example, printing the value of a variable inside a function is a side effect.</p>
<p>Both R and Python are not purely functional languages per se, although R behaves more functional than Python<sup class="footnote" id="fnr23"><a href="#fn23">23</a></sup>. If you wonder why the <code>return</code> function could be ignored to return a value in an R function, now you get the explanation – a function should return the output automatically from a FP perspective.</p>
<p>I don’t think it makes any sense to debate on if we should choose <span class="caps">OOP</span> or FP using R/Python. And both languages support multiple programming paradigms (FP, <span class="caps">OOP</span> etc.). If you want to do purist FP, neither R nor Python would be the best choice. For example, purist FP should avoid loop because loop always involves an iteration counter whose value changes over iterations, which is obviously a side effect. Thus, recursion is very important in FP. But tail recursion optimization is not supported in R and Python although it can be implemented via trampoline<sup class="footnote" id="fnr24"><a href="#fn24">24</a></sup>.</p>
<p>Let’s introduce a few tools that could help to get a first impression of FP in R and Python.</p>
<h3>Anonymous function</h3>
<p>We have seen anonymous functions before. Anonymous functions are commonly used in FP. To recap, let’s see how to define a useless anonymous function.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="o">></span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> x<span class="o">*</span>x
<span class="lineno">2 </span><span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> x<span class="o">*</span>x</code></pre></figure></div>
<div class="coderight">
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="o">>>></span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">**</span><span class="mi">2</span>
<span class="lineno">2 </span><span class="o"><</span><span class="n">function</span> <span class="o"><</span><span class="k">lambda</span><span class="o">></span> <span class="n">at</span> <span class="mh">0x1033d5510</span><span class="o">></span></code></pre></figure></div>
</div>
<p>Usually, we define anonymous functions because we want to pass them to other functions. And thus, the functions defined above are useless as they are not passed to any other function and after the definition they are deleted automatically from the memory.</p>
<p>Of course, we can assign names to the anonymous functions – but what is the point to name anonymous functions?</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno">1 </span><span class="o">></span> f1 <span class="o">=</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> x<span class="o">*</span>x
<span class="lineno">2 </span><span class="o">></span> f1
<span class="lineno">3 </span><span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> x<span class="o">*</span>x</code></pre></figure></div>
<div class="coderight">
<language>Python</language>
<figure class="highlight"><pre><code class="language-python3" data-lang="python3"><span></span><span class="lineno">1 </span><span class="o">>>></span> <span class="n">f1</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">**</span><span class="mi">2</span>
<span class="lineno">2 </span><span class="o">>>></span> <span class="n">f1</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
<span class="lineno">3 </span><span class="mi">4</span></code></pre></figure></div>
</div>
<h3>Map</h3>
<p>Map could be used to avoid loops. The first argument to map is a function.</p>
<p>In R, we use <code>Map</code> which is shipped with base R; and in Python we use <code>map</code> which is also a built-in function.</p>
<div class="codewrapper">
<div class="codeleft">
<language>R</language>
<figure class="highlight"><pre><code class="language-r" data-lang="r"><span></span><span class="lineno"> 1 </span><span class="o">></span> <span class="kp">Map</span><span class="p">(</span><span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> x<span class="o">*</span>x<span class="p">,</span> <span class="kt">c</span><span class="p">(</span><span class="m">1</span><span class="o">:</span><span class="m">3</span><span class="p">))</span>
<span class="lineno"> 2 </span><span class="p">[[</span><span class="m">1</span><span class="p">]]</span>
<span class="lineno"> 3 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">1</span>
<span class="lineno"> 4 </span>
<span class="lineno"> 5 </span><span class="p">[[</span><span class="m">2</span><span class="p">]]</span>
<span class="lineno"> 6 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">4</span>
<span class="lineno"> 7 </span>
<span class="lineno"> 8 </span><span class="p">[[</span><span class="m">3</span><span class="p">]]</span>
<span class="lineno"> 9 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">9</span>
<span class="lineno">10 </span><span class="o">></span> <span class="kp">Map</span><span class="p">(</span><span class="kr">function</span><span class="p">(</span>x<span class="p">,</span> y<span class="p">)</span> x<span class="o">*</span>y<span class="p">,</span> <span class="kt">c</span><span class="p">(</span><span class="m">1</span><span class="o">:</span><span class="m">3</span><span class="p">),</span> <span class="kt">c</span><span class="p">(</span><span class="m">4</span><span class="o">:</span><span class="m">6</span><span class="p">))</span>
<span class="lineno">11 </span><span class="p">[[</span><span class="m">1</span><span class="p">]]</span>
<span class="lineno">12 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">4</span>
<span class="lineno">13 </span>
<span class="lineno">14 </span><span class="p">[[</span><span class="m">2</span><span class="p">]]</span>
<span class="lineno">15 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">10</span>
<span class="lineno">16 </span>
<span class="lineno">17 </span><span class="p">[[</span><span class="m">3</span><span class="p">]]</span>
<span class="lineno">18 </span><span class="p">[</span><span class="m">1</span><span class="p">]</span> <span class="m">18</span></code></pre></figure></div>