2020import sys
2121import timeit
2222import re
23+ import platform
24+ import hashlib
25+ import os
26+ import subprocess
2327
2428import numpy as np
2529import sklearn
26-
30+ try :
31+ import itt
32+ itt_module_installed = True
33+ except :
34+ itt_module_installed = False
2735
2836def get_dtype (data ):
2937 '''
@@ -159,6 +167,8 @@ def parse_args(parser, size=None, loop_types=(),
159167 parser .add_argument ('--time-method' , type = str , default = 'box_filter' ,
160168 choices = ('box_filter' ),
161169 help = 'Method used for time mesurements' )
170+ parser .add_argument ('--box-filter-measurements-analysis' , type = int , default = 100 ,
171+ help = 'Maximum number of measurements in box filter (for analyzed stage)' )
162172 parser .add_argument ('--box-filter-measurements' , type = int , default = 100 ,
163173 help = 'Maximum number of measurements in box filter' )
164174 parser .add_argument ('--inner-loops' , default = 100 , type = int ,
@@ -167,6 +177,8 @@ def parse_args(parser, size=None, loop_types=(),
167177 parser .add_argument ('--outer-loops' , default = 100 , type = int ,
168178 help = 'Maximum outer loop iterations '
169179 '(we take the min over outer iterations)' )
180+ parser .add_argument ('--time-limit-analysis' , default = 10. , type = float ,
181+ help = 'Target time to spend to benchmark (for analyzed stage)' )
170182 parser .add_argument ('--time-limit' , default = 10. , type = float ,
171183 help = 'Target time to spend to benchmark' )
172184 parser .add_argument ('--goal-outer-loops' , default = 10 ,
@@ -186,6 +198,25 @@ def parse_args(parser, size=None, loop_types=(),
186198 parser .add_argument ('--device' , default = 'none' , type = str ,
187199 choices = ('host' , 'cpu' , 'gpu' , 'none' ),
188200 help = 'Execution context device' )
201+ parser .add_argument ('--emon' , default = False ,
202+ action = 'store_true' ,
203+ help = 'Should emon profiling be started' )
204+ parser .add_argument ('--vtune' , default = False ,
205+ action = 'store_true' ,
206+ help = 'Should vtune profiling be started' )
207+ parser .add_argument ('--psrecord' , default = False ,
208+ action = 'store_true' ,
209+ help = 'Should psrecord profiling be started' )
210+ parser .add_argument ('--ittpy' , default = False ,
211+ action = 'store_true' ,
212+ help = 'Should ittpy domains be integrated' )
213+ parser .add_argument ('--sgx-gramine' , default = False ,
214+ action = 'store_true' ,
215+ help = 'Should benchmark run with Gramine & Intel(R) SGX' )
216+ parser .add_argument ('--flush-caches' , default = False ,
217+ action = 'store_true' ,
218+ help = 'Should benchmark flush CPU caches after each run during measuring' )
219+ parser .add_argument ('--target-stage' , type = str , default = 'default' , help = 'Select target stage for analysis.' )
189220
190221 for data in ['X' , 'y' ]:
191222 for stage in ['train' , 'test' ]:
@@ -201,6 +232,9 @@ def parse_args(parser, size=None, loop_types=(),
201232
202233 params = parser .parse_args ()
203234
235+ if params .ittpy and itt_module_installed :
236+ itt .pause ()
237+
204238 if not params .no_intel_optimized :
205239 try :
206240 from sklearnex import patch_sklearn
@@ -272,18 +306,68 @@ def prepare_daal_threads(num_threads=-1):
272306 return num_threads
273307
274308
275- def measure_function_time (func , * args , params , ** kwargs ):
276- return time_box_filter (func , * args ,
277- n_meas = params .box_filter_measurements ,
278- time_limit = params .time_limit , ** kwargs )
309+ def measure_function_time (func , * args , params , stage , ** kwargs ):
310+ results = time_box_filter (func , * args , params = params , stage = stage , ** kwargs )
311+ return results
279312
280313
281- def time_box_filter (func , * args , n_meas , time_limit , ** kwargs ):
314+ def detect_LLC_size ():
315+ with open ('/sys/devices/system/cpu/cpu0/cache/index3/size' , 'r' ) as f :
316+ llc_size_str = f .readline ().strip ()
317+ llc_size = int (llc_size_str [:- 1 ]) * 1024
318+ return llc_size
319+
320+
321+ def flush_caches ():
322+ flush_datafile = 'data/flush_data.npy'
323+ if os .path .exists (flush_datafile ):
324+ with open (flush_datafile , 'rb' ) as f :
325+ data = np .load (f ).astype (np .double )
326+ else :
327+ data_size = detect_LLC_size () // 8 * 8 # size in doubles x8
328+ columns_number = 100
329+ rows_number = data_size // columns_number
330+ data = np .random .rand (rows_number , columns_number ).astype (np .double )
331+ with open (flush_datafile , 'wb' ) as f :
332+ np .save (f , data )
333+
334+ iterations_to_flush = 3
335+ try :
336+ from sklearnex .cluster import KMeans
337+ except :
338+ from sklearn .cluster import KMeans
339+ for number_flush_iteration in range (iterations_to_flush ):
340+ model = KMeans (max_iter = 3 , tol = 1e-7 ).fit (data )
341+
342+
343+ def time_box_filter (func , * args , params , stage , ** kwargs ):
344+ flush_caches_flag = params .flush_caches
345+ if params .target_stage != 'default' :
346+ if params .target_stage == stage :
347+ time_limit = params .time_limit_analysis
348+ n_meas = params .box_filter_measurements_analysis
349+ is_the_target_stage = True
350+ else :
351+ time_limit = 0
352+ n_meas = 1
353+ is_the_target_stage = False
354+ else :
355+ time_limit = params .time_limit
356+ n_meas = params .box_filter_measurements
357+ is_the_target_stage = True
358+
282359 times = []
283360 while len (times ) < n_meas :
361+ if flush_caches_flag :
362+ flush_caches ()
363+
364+ if params .ittpy and is_the_target_stage and itt_module_installed :
365+ itt .resume ()
284366 t0 = timeit .default_timer ()
285367 val = func (* args , ** kwargs )
286368 t1 = timeit .default_timer ()
369+ if params .ittpy and is_the_target_stage and itt_module_installed :
370+ itt .pause ()
287371 times .append (t1 - t0 )
288372 if sum (times ) > time_limit :
289373 break
@@ -560,7 +644,9 @@ def print_output(library, algorithm, stages, params, functions,
560644 result ['algorithm_parameters' ]['init' ] = 'random'
561645 result ['algorithm_parameters' ].pop ('handle' , None )
562646 output .append (result )
647+ print ('# Intel(R) Extension for Scikit-learn case result:' )
563648 print (json .dumps (output , indent = 4 ))
649+ print ('# Intel(R) Extension for Scikit-learn case finished.' )
564650
565651
566652def run_with_context (params , function ):
0 commit comments