1+ import time
2+
3+ import matplotlib .pyplot as plt
4+ import numpy as np
5+ import ROOT
6+
7+ # ROOT.EnableImplicitMT()
8+
9+ @ROOT .Numba .Declare (['double' , 'int' ], 'double' )
10+ def pypow_numba (x , y ):
11+ return x ** y
12+
13+ def pypow_cppyy (x : float , y : int ) -> float :
14+ return x ** y
15+
16+ def benchmark (N ):
17+ rdf = ROOT .RDataFrame (N ).Define ("x" , "(double)rdfentry_" ).Define ("y" , "2" )
18+
19+ # cppyy version
20+ start_cppyy = time .perf_counter ()
21+ rdf_cppyy = rdf .Define ("pow_cppyy" , pypow_cppyy , ["x" , "y" ])
22+ mean_cppyy = rdf_cppyy .Mean ("pow_cppyy" ).GetValue ()
23+ end_cppyy = time .perf_counter ()
24+
25+ # Numba version
26+ start_numba = time .perf_counter ()
27+ rdf_numba = rdf .Define ("pow_numba" , "Numba::pypow_numba(x, y)" )
28+ mean_numba = rdf_numba .Mean ("pow_numba" ).GetValue ()
29+ end_numba = time .perf_counter ()
30+
31+ return {
32+ "N" : N ,
33+ "mean_numba" : mean_numba ,
34+ "mean_cppyy" : mean_cppyy ,
35+ "time_numba" : end_numba - start_numba ,
36+ "time_cppyy" : end_cppyy - start_cppyy ,
37+ }
38+
39+ N_values = [10_000 , 100_000 , 1_000_000 , 5_000_000 , 10_000_000 , 50_000_000 , 100_000_000 ]
40+ results = [benchmark (N ) for N in N_values ]
41+
42+ print ("Résultats :)" )
43+ print (f"{ 'N' :>10} | { 'Numba time (s)' :>15} | { 'cppyy time (s)' :>15} | { 'cppyy speedup' :>15} | { 'Δ Mean' :>10} " )
44+ print ("-" * 75 )
45+ for r in results [1 :]:
46+ mean_diff = abs (r ["mean_numba" ] - r ["mean_cppyy" ])
47+ cppyy_speedup = r ["time_numba" ] / r ["time_cppyy" ] if r ["time_cppyy" ] > 0 else float ('inf' )
48+ print (f"{ r ['N' ]:>10} | { r ['time_numba' ]:>15.3f} | { r ['time_cppyy' ]:>15.3f} | { cppyy_speedup :>15.2f} | { mean_diff :>10.5f} " )
49+
50+
51+ plt .figure (figsize = (7 ,5 ))
52+ plt .plot (N_values , [r ["time_numba" ] for r in results ], "-o" , label = "Numba" )
53+ plt .plot (N_values , [r ["time_cppyy" ] for r in results ], "-o" , label = "cppyy" )
54+
55+ plt .xlabel ("Number of entries" )
56+ plt .ylabel ("Execution time (s)" )
57+ plt .title ("RDataFrame Define(): Numba vs cppyy" )
58+ plt .legend ()
59+ plt .grid (True )
60+ plt .savefig ("/home/siliataider/Documents/root/bench_out/benchmark_numba_vs_cppyy.png" )
61+ plt .show ()
62+
63+ # N = 5_000_000
64+ # rdf = ROOT.RDataFrame(N).Define("x", "(double)rdfentry_").Define("y", "2.0")
65+
66+ # # --- Numba version --------------------------------------------------------
67+ # @ROOT.Numba.Declare(['double', 'int'], 'double')
68+ # def pypow_numba(x, y):
69+ # return x**y
70+
71+ # start_numba = time.perf_counter()
72+
73+ # rdf_numba = rdf.Define("pow_numba", "Numba::pypow_numba(x, y)")
74+ # mean_numba = rdf_numba.Mean("pow_numba").GetValue()
75+
76+ # end_numba = time.perf_counter()
77+
78+ # print(f"[Numba] Mean: {mean_numba:.5f}, time: {end_numba - start_numba:.3f} s")
79+
80+ # # --- cppyy version --------------------------------------------------------
81+ # def pypow_cppyy(x: float, y: int) -> float:
82+ # return x**y
83+
84+ # start_cppyy = time.perf_counter()
85+
86+ # rdf_cppyy = rdf.Define("pow_cppyy", pypow_cppyy, ["x", "y"])
87+ # mean_cppyy = rdf_cppyy.Mean("pow_cppyy").GetValue()
88+
89+ # end_cppyy = time.perf_counter()
90+
91+ # print(f"[cppyy] Mean: {mean_cppyy:.5f}, time: {end_cppyy - start_cppyy:.3f} s")
92+
93+
94+
95+ import ROOT
96+
97+ ROOT .gInterpreter .Declare ("""
98+ #include <ROOT/RVec.hxx>
99+ using namespace ROOT::VecOps;
100+
101+ template <typename T>
102+ struct SquareVecT {
103+ RVec<T> operator()(const RVec<T>& xvec) const {
104+ return xvec * xvec;
105+ }
106+ };
107+ """ )
108+
109+ square_vec_int = ROOT .SquareVecT [int ]()
110+ rdf = ROOT .RDataFrame (3 ) \
111+ .Define ("xvec" , "ROOT::VecOps::RVec{(int)rdfentry_, (int)rdfentry_ + 1, (int)rdfentry_ + 2}" ) \
112+ .Define ("squared_xvec" , square_vec_int , ["xvec" ])
0 commit comments