xref: /petsc/src/binding/petsc4py/demo/regressor/test_regressor_synthetic.py (revision c12c126234ed623246a63bfa78c9f75a3aa00323)
1# Linear regression with synthetic data generation
2# ================================================
3#
4# Generate a synthetic data set using sklearn.datasets.make_regression() and
5# then solve it using the petsc4py interface to PetscRegressor.
6
7import numpy as np
8# Needed for plotting
9import matplotlib.colors
10import matplotlib.pyplot as plt
11
12# Needed for generating classification, regression and clustering datasets
13import sklearn.datasets as dt
14
15# Needed for evaluating the quality of the regression for the test data set
16from sklearn.metrics import mean_squared_error
17
18import argparse
19parser = argparse.ArgumentParser('Test MLG using synthetic data')
20parser.add_argument('--nsample', type=int, default=10000)
21parser.add_argument('--nfeature', type=int, default=10)
22parser.add_argument('--add_noise', action='store_true')
23args, unknown = parser.parse_known_args()
24
25import sys
26import petsc4py
27sys.argv = [sys.argv[0]] + unknown
28petsc4py.init(sys.argv)
29from petsc4py import PETSc
30
31# Define the seed so that results can be reproduced
32seed = 11
33rand_state = 11
34
35# Define the color maps for plots
36color_map = plt.cm.get_cmap('RdYlBu')
37color_map_discrete = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","cyan","magenta","blue"])
38
39def petsc_regression_test(nsample, nfeature, noise=0, rand_state=11):
40  ntr = round(nsample*9/10)
41  nte = nsample - ntr
42  x,y = dt.make_regression(n_samples=nsample,
43                           n_features=nfeature,
44                           noise=noise,
45                           random_state=rand_state)
46  x_train, y_train = x[:ntr,], y[:ntr]
47  xte, yte = x[ntr:,], y[ntr:]
48  comm = PETSc.COMM_WORLD
49  rank = comm.getRank()
50  regressor = PETSc.Regressor().create(comm=comm)
51  regressor.setType(PETSc.Regressor.Type.LINEAR)
52  rows_ix = np.arange(ntr,dtype=np.int32)
53  cols_ix = np.arange(nfeature,dtype=np.int32)
54  X = PETSc.Mat().create(comm=comm)
55  X.setSizes((ntr,nfeature))
56  X.setFromOptions()
57  X.setUp()
58  y = PETSc.Vec().create(comm=comm)
59  y.setSizes(ntr)
60  y.setFromOptions()
61  if not rank :
62    X.setValues(rows_ix,cols_ix,x_train,addv=True)
63    y.setValues(rows_ix,y_train,addv=False)
64  X.assemblyBegin(X.AssemblyType.FINAL)
65  X.assemblyEnd(X.AssemblyType.FINAL)
66  y.assemblyBegin()
67  y.assemblyEnd()
68  regressor.fit(X,y)
69  rows_ix = np.arange(nte,dtype=np.int32)
70  X = PETSc.Mat().create(comm=comm)
71  X.setSizes((nte,nfeature))
72  X.setFromOptions()
73  X.setUp()
74  y = PETSc.Vec().create(comm=comm)
75  y.setSizes(nte)
76  y.setFromOptions()
77  if not rank :
78    X.setValues(rows_ix,cols_ix,xte,addv=True)
79    y.zeroEntries()
80  X.assemblyBegin(X.AssemblyType.FINAL)
81  X.assemblyEnd(X.AssemblyType.FINAL)
82  y.assemblyBegin()
83  y.assemblyEnd()
84  regressor.predict(X,y)
85  ypr = y.getArray()
86  error = mean_squared_error(ypr,yte)
87  print(f"Test MSE: {error:f}")
88  return xte,ypr
89
90xte,ypr = petsc_regression_test(args.nsample, args.nfeature, 0)
91
92if args.add_noise:
93  fig,ax = plt.subplots(nrows=2, ncols=3,figsize=(16,7))
94  plt_ind_list = np.arange(6)+231
95
96  for noise,plt_ind in zip([0,0.1,1,10,100,1000],plt_ind_list):
97    xte,ypr = petsc_regression_test(args.nsample, args.nfeature, noise, rand_state)
98
99    plt.subplot(plt_ind)
100    my_scatter_plot = plt.scatter(xte[:,0],
101                                  xte[:,1],
102                                  c=ypr,
103                                  vmin=min(ypr),
104                                  vmax=max(ypr),
105                                  s=35,
106                                  cmap=color_map)
107
108    plt.title('noise: '+str(noise))
109    plt.colorbar(my_scatter_plot)
110
111  fig.subplots_adjust(hspace=0.3,wspace=.3)
112  plt.suptitle('PETSc regression tests with different noise levels',fontsize=20)
113  plt.show(block=True)
114