1# Linear regression with synthetic data generation 2# ================================================ 3# 4# Generate a synthetic data set using sklearn.datasets.make_regression() and 5# then solve it using the petsc4py interface to PetscRegressor. 6 7import numpy as np 8# Needed for plotting 9import matplotlib.colors 10import matplotlib.pyplot as plt 11 12# Needed for generating classification, regression and clustering datasets 13import sklearn.datasets as dt 14 15# Needed for evaluating the quality of the regression for the test data set 16from sklearn.metrics import mean_squared_error 17 18import argparse 19parser = argparse.ArgumentParser('Test MLG using synthetic data') 20parser.add_argument('--nsample', type=int, default=10000) 21parser.add_argument('--nfeature', type=int, default=10) 22parser.add_argument('--add_noise', action='store_true') 23args, unknown = parser.parse_known_args() 24 25import sys 26import petsc4py 27sys.argv = [sys.argv[0]] + unknown 28petsc4py.init(sys.argv) 29from petsc4py import PETSc 30 31# Define the seed so that results can be reproduced 32seed = 11 33rand_state = 11 34 35# Define the color maps for plots 36color_map = plt.cm.get_cmap('RdYlBu') 37color_map_discrete = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","cyan","magenta","blue"]) 38 39def petsc_regression_test(nsample, nfeature, noise=0, rand_state=11): 40 ntr = round(nsample*9/10) 41 nte = nsample - ntr 42 x,y = dt.make_regression(n_samples=nsample, 43 n_features=nfeature, 44 noise=noise, 45 random_state=rand_state) 46 x_train, y_train = x[:ntr,], y[:ntr] 47 xte, yte = x[ntr:,], y[ntr:] 48 comm = PETSc.COMM_WORLD 49 rank = comm.getRank() 50 regressor = PETSc.Regressor().create(comm=comm) 51 regressor.setType(PETSc.Regressor.Type.LINEAR) 52 rows_ix = np.arange(ntr,dtype=np.int32) 53 cols_ix = np.arange(nfeature,dtype=np.int32) 54 X = PETSc.Mat().create(comm=comm) 55 X.setSizes((ntr,nfeature)) 56 X.setFromOptions() 57 X.setUp() 58 y = PETSc.Vec().create(comm=comm) 59 y.setSizes(ntr) 60 y.setFromOptions() 61 if not rank : 62 X.setValues(rows_ix,cols_ix,x_train,addv=True) 63 y.setValues(rows_ix,y_train,addv=False) 64 X.assemblyBegin(X.AssemblyType.FINAL) 65 X.assemblyEnd(X.AssemblyType.FINAL) 66 y.assemblyBegin() 67 y.assemblyEnd() 68 regressor.fit(X,y) 69 rows_ix = np.arange(nte,dtype=np.int32) 70 X = PETSc.Mat().create(comm=comm) 71 X.setSizes((nte,nfeature)) 72 X.setFromOptions() 73 X.setUp() 74 y = PETSc.Vec().create(comm=comm) 75 y.setSizes(nte) 76 y.setFromOptions() 77 if not rank : 78 X.setValues(rows_ix,cols_ix,xte,addv=True) 79 y.zeroEntries() 80 X.assemblyBegin(X.AssemblyType.FINAL) 81 X.assemblyEnd(X.AssemblyType.FINAL) 82 y.assemblyBegin() 83 y.assemblyEnd() 84 regressor.predict(X,y) 85 ypr = y.getArray() 86 error = mean_squared_error(ypr,yte) 87 print(f"Test MSE: {error:f}") 88 return xte,ypr 89 90xte,ypr = petsc_regression_test(args.nsample, args.nfeature, 0) 91 92if args.add_noise: 93 fig,ax = plt.subplots(nrows=2, ncols=3,figsize=(16,7)) 94 plt_ind_list = np.arange(6)+231 95 96 for noise,plt_ind in zip([0,0.1,1,10,100,1000],plt_ind_list): 97 xte,ypr = petsc_regression_test(args.nsample, args.nfeature, noise, rand_state) 98 99 plt.subplot(plt_ind) 100 my_scatter_plot = plt.scatter(xte[:,0], 101 xte[:,1], 102 c=ypr, 103 vmin=min(ypr), 104 vmax=max(ypr), 105 s=35, 106 cmap=color_map) 107 108 plt.title('noise: '+str(noise)) 109 plt.colorbar(my_scatter_plot) 110 111 fig.subplots_adjust(hspace=0.3,wspace=.3) 112 plt.suptitle('PETSc regression tests with different noise levels',fontsize=20) 113 plt.show(block=True) 114