xref: /petsc/src/mat/tests/ex70.c (revision 117ef88edefbfc12e7c19efe87a19a2e1b0acd4f)
1 #include <petscmat.h>
2 
3 static char help[] = "Tests MatMatMult with MAT_REUSE_MATRIX and already allocated dense result.\n\n";
4 
5 static PetscErrorCode CheckLocal(Mat A, Mat B, PetscScalar *a, PetscScalar *b)
6 {
7   PetscErrorCode ierr;
8   PetscBool      wA = PETSC_FALSE, wB = PETSC_FALSE;
9 
10   PetscFunctionBegin;
11   if (a) {
12     const PetscScalar *Aa;
13     ierr = MatDenseGetArrayRead(A,&Aa);CHKERRQ(ierr);
14     wA   = (PetscBool)(a != Aa);
15     ierr = MatDenseRestoreArrayRead(A,&Aa);CHKERRQ(ierr);
16   }
17   if (b) {
18     const PetscScalar *Bb;
19     ierr = MatDenseGetArrayRead(B,&Bb);CHKERRQ(ierr);
20     wB   = (PetscBool)(b != Bb);
21     ierr = MatDenseRestoreArrayRead(B,&Bb);CHKERRQ(ierr);
22   }
23   if (wA || wB) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong array in first Mat? %d, Wrong array in second Mat? %d",wA,wB);
24   PetscFunctionReturn(0);
25 }
26 
27 int main(int argc,char **args)
28 {
29   Mat            X,B,A,T,T2;
30   PetscInt       m,n,k,M = 10,N = 10,K = 5, ldx = 3, ldb = 5;
31   const char     *deft = MATAIJ;
32   char           mattype[256];
33   PetscBool      flg,symm = PETSC_FALSE,testtt = PETSC_TRUE, testnest = PETSC_TRUE, testtranspose = PETSC_TRUE, testcircular = PETSC_FALSE, local = PETSC_TRUE;
34   PetscBool      xgpu = PETSC_FALSE,bgpu = PETSC_FALSE;
35   PetscScalar    *dataX = NULL,*dataB = NULL;
36   PetscScalar    *aX,*aB;
37   PetscReal      err;
38   PetscErrorCode ierr;
39 
40   ierr = PetscInitialize(&argc,&args,NULL,help);if (ierr) return ierr;
41   ierr = PetscOptionsGetInt(NULL,NULL,"-N",&N,NULL);CHKERRQ(ierr);
42   ierr = PetscOptionsGetInt(NULL,NULL,"-M",&M,NULL);CHKERRQ(ierr);
43   ierr = PetscOptionsGetInt(NULL,NULL,"-K",&K,NULL);CHKERRQ(ierr);
44   ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr);
45   ierr = PetscOptionsGetBool(NULL,NULL,"-local",&local,NULL);CHKERRQ(ierr);
46   ierr = PetscOptionsGetInt(NULL,NULL,"-ldx",&ldx,NULL);CHKERRQ(ierr);
47   ierr = PetscOptionsGetInt(NULL,NULL,"-ldb",&ldb,NULL);CHKERRQ(ierr);
48   ierr = PetscOptionsGetBool(NULL,NULL,"-testtranspose",&testtranspose,NULL);CHKERRQ(ierr);
49   ierr = PetscOptionsGetBool(NULL,NULL,"-testnest",&testnest,NULL);CHKERRQ(ierr);
50   ierr = PetscOptionsGetBool(NULL,NULL,"-testtt",&testtt,NULL);CHKERRQ(ierr);
51   ierr = PetscOptionsGetBool(NULL,NULL,"-testcircular",&testcircular,NULL);CHKERRQ(ierr);
52   ierr = PetscOptionsGetBool(NULL,NULL,"-xgpu",&xgpu,NULL);CHKERRQ(ierr);
53   ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr);
54 #if defined(PETSC_USE_COMPLEX)
55   testtranspose = PETSC_FALSE;
56   testtt = PETSC_FALSE;
57 #endif
58   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
59   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
60   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
61   ierr = MatSetUp(A);CHKERRQ(ierr);
62   ierr = MatSetRandom(A,NULL);CHKERRQ(ierr);
63   if (M==N && symm) {
64     Mat AT;
65 
66     ierr = MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
67     ierr = MatAXPY(A,1.0,AT,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
68     ierr = MatDestroy(&AT);CHKERRQ(ierr);
69     ierr = MatSetOption(A,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
70   }
71   ierr = MatViewFromOptions(A,NULL,"-A_init_view");CHKERRQ(ierr);
72   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,"","","");CHKERRQ(ierr);
73   ierr = PetscOptionsFList("-A_mat_type","Matrix type","MatSetType",MatList,deft,mattype,256,&flg);CHKERRQ(ierr);
74   ierr = PetscOptionsEnd();CHKERRQ(ierr);
75   if (flg) {
76     Mat A2;
77 
78     /* MATSEQAIJCUSPARSE does not support MAT_INITIAL_MATRIX */
79     ierr = MatDuplicate(A,MAT_COPY_VALUES,&A2);CHKERRQ(ierr);
80     ierr = MatConvert(A,mattype,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
81     ierr = MatMultEqual(A,A2,10,&flg);CHKERRQ(ierr);
82     if (!flg) {
83       Mat AE,A2E;
84 
85       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with convert\n");CHKERRQ(ierr);
86       ierr = MatComputeOperator(A,MATDENSE,&AE);CHKERRQ(ierr);
87       ierr = MatComputeOperator(A2,MATDENSE,&A2E);CHKERRQ(ierr);
88       ierr = MatView(AE,NULL);CHKERRQ(ierr);
89       ierr = MatView(A2E,NULL);CHKERRQ(ierr);
90       ierr = MatAXPY(A2E,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
91       ierr = MatView(A2E,NULL);CHKERRQ(ierr);
92       ierr = MatDestroy(&A2E);CHKERRQ(ierr);
93       ierr = MatDestroy(&AE);CHKERRQ(ierr);
94     }
95     ierr = MatDestroy(&A2);CHKERRQ(ierr);
96   }
97   ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr);
98 
99   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
100   if (local) {
101     ierr = PetscMalloc1(PetscMax((m+ldx)*K,1),&dataX);CHKERRQ(ierr);
102     ierr = PetscMalloc1(PetscMax((n+ldb)*K,1),&dataB);CHKERRQ(ierr);
103   }
104   ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,K,dataB,&B);CHKERRQ(ierr);
105   ierr = MatCreateDense(PETSC_COMM_WORLD,m,PETSC_DECIDE,M,K,dataX,&X);CHKERRQ(ierr);
106 
107   /* store pointer to dense data for testing */
108   ierr = MatDenseGetArrayRead(B,(const PetscScalar**)&dataB);CHKERRQ(ierr);
109   ierr = MatDenseGetArrayRead(X,(const PetscScalar**)&dataX);CHKERRQ(ierr);
110   aX   = dataX;
111   aB   = dataB;
112   ierr = MatDenseRestoreArrayRead(B,(const PetscScalar**)&dataB);CHKERRQ(ierr);
113   ierr = MatDenseRestoreArrayRead(X,(const PetscScalar**)&dataX);CHKERRQ(ierr);
114   if (local) {
115     dataX = aX;
116     dataB = aB;
117   }
118   ierr = MatSetRandom(B,NULL);CHKERRQ(ierr);
119   /* convert to CUDA if needed */
120   if (bgpu) {
121     ierr = MatConvert(B,MATDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
122   }
123   if (xgpu) {
124     ierr = MatAssemblyBegin(X,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
125     ierr = MatAssemblyEnd(X,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
126     ierr = MatConvert(X,MATDENSECUDA,MAT_INPLACE_MATRIX,&X);CHKERRQ(ierr);
127   }
128   ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
129 
130   /* Test reusing a previously allocated dense buffer */
131   ierr = MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
132   ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
133   ierr = MatMatMultEqual(A,B,X,10,&flg);CHKERRQ(ierr);
134   if (!flg) {
135     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage\n");CHKERRQ(ierr);
136     ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
137     ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
138     ierr = MatView(T,NULL);CHKERRQ(ierr);
139     ierr = MatDestroy(&T);CHKERRQ(ierr);
140   }
141 
142   /* Test MatDenseGetColumnVec and friends */
143   ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
144   ierr = MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&T2);CHKERRQ(ierr);
145   for (k=0;k<K;k++) {
146     Vec Xv,Tv,T2v;
147 
148     ierr = MatDenseGetColumnVecRead(X,k,&Xv);CHKERRQ(ierr);
149     ierr = MatDenseGetColumnVec(T,k,&Tv);CHKERRQ(ierr);
150     ierr = MatDenseGetColumnVecWrite(T2,k,&T2v);CHKERRQ(ierr);
151     ierr = VecCopy(Xv,T2v);CHKERRQ(ierr);
152     ierr = VecAXPY(Tv,-1.,Xv);CHKERRQ(ierr);
153     ierr = MatDenseRestoreColumnVecRead(X,k,&Xv);CHKERRQ(ierr);
154     ierr = MatDenseRestoreColumnVec(T,k,&Tv);CHKERRQ(ierr);
155     ierr = MatDenseRestoreColumnVecWrite(T2,k,&T2v);CHKERRQ(ierr);
156   }
157   ierr = MatNorm(T,NORM_FROBENIUS,&err);CHKERRQ(ierr);
158   if (err > PETSC_SMALL) {
159     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVec\n");CHKERRQ(ierr);
160     ierr = MatView(T,NULL);CHKERRQ(ierr);
161   }
162   ierr = MatAXPY(T2,-1.,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
163   ierr = MatNorm(T2,NORM_FROBENIUS,&err);CHKERRQ(ierr);
164   if (err > PETSC_SMALL) {
165     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVecWrite\n");CHKERRQ(ierr);
166     ierr = MatView(T2,NULL);CHKERRQ(ierr);
167   }
168   ierr = MatDestroy(&T);CHKERRQ(ierr);
169   ierr = MatDestroy(&T2);CHKERRQ(ierr);
170 
171   /* Test with MatShell */
172   ierr = MatConvert(A,MATSHELL,MAT_INITIAL_MATRIX,&T2);CHKERRQ(ierr);
173   ierr = MatMatMult(T2,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
174   ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
175   ierr = MatMatMultEqual(T2,B,X,10,&flg);CHKERRQ(ierr);
176   if (!flg) {
177     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MATSHELL)\n");CHKERRQ(ierr);
178     ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
179     ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
180     ierr = MatView(T,NULL);CHKERRQ(ierr);
181     ierr = MatDestroy(&T);CHKERRQ(ierr);
182   }
183 
184   if (testtranspose) {
185     ierr = MatTransposeMatMult(T2,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
186     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
187     ierr = MatTransposeMatMultEqual(T2,X,B,10,&flg);CHKERRQ(ierr);
188     if (!flg) {
189       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MatTranspose, MATSHELL)\n");CHKERRQ(ierr);
190       ierr = MatTransposeMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
191       ierr = MatAXPY(T,-1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
192       ierr = MatView(T,NULL);CHKERRQ(ierr);
193       ierr = MatDestroy(&T);CHKERRQ(ierr);
194     }
195   }
196   ierr = MatDestroy(&T2);CHKERRQ(ierr);
197 
198   if (testnest) { /* test with MatNest */
199     Mat        NA;
200     const char *vtype;
201 
202     ierr = MatCreateNest(PETSC_COMM_WORLD,1,NULL,1,NULL,&A,&NA);CHKERRQ(ierr);
203     /* needed to test against CUSPARSE matrices */
204     ierr = MatGetVecType(A,&vtype);CHKERRQ(ierr);
205     ierr = MatSetVecType(NA,vtype);CHKERRQ(ierr);
206     ierr = MatViewFromOptions(NA,NULL,"-NA_view");CHKERRQ(ierr);
207     ierr = MatMatMult(NA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
208     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
209     ierr = MatMatMultEqual(NA,B,X,10,&flg);CHKERRQ(ierr);
210     if (!flg) {
211       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Nest\n");CHKERRQ(ierr);
212       ierr = MatMatMult(NA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
213       ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
214       ierr = MatView(T,NULL);CHKERRQ(ierr);
215       ierr = MatDestroy(&T);CHKERRQ(ierr);
216     }
217     ierr = MatDestroy(&NA);CHKERRQ(ierr);
218   }
219 
220   if (testtranspose) { /* test with Transpose */
221     Mat TA;
222 
223     ierr = MatCreateHermitianTranspose(A,&TA);CHKERRQ(ierr);
224     ierr = MatMatMult(TA,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
225     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
226     ierr = MatMatMultEqual(TA,X,B,10,&flg);CHKERRQ(ierr);
227     if (!flg) {
228       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose\n");CHKERRQ(ierr);
229       ierr = MatMatMult(TA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
230       ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
231       ierr = MatView(T,NULL);CHKERRQ(ierr);
232       ierr = MatDestroy(&T);CHKERRQ(ierr);
233     }
234     ierr = MatDestroy(&TA);CHKERRQ(ierr);
235   }
236 
237   if (testtt) { /* test with Transpose(Transpose) */
238     Mat TA, TTA;
239 
240     ierr = MatCreateHermitianTranspose(A,&TA);CHKERRQ(ierr);
241     ierr = MatCreateHermitianTranspose(TA,&TTA);CHKERRQ(ierr);
242     ierr = MatMatMult(TTA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
243     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
244     ierr = MatMatMultEqual(TTA,B,X,10,&flg);CHKERRQ(ierr);
245     if (!flg) {
246       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose(Transpose)\n");CHKERRQ(ierr);
247       ierr = MatMatMult(TTA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
248       ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
249       ierr = MatView(T,NULL);CHKERRQ(ierr);
250       ierr = MatDestroy(&T);CHKERRQ(ierr);
251     }
252     ierr = MatDestroy(&TA);CHKERRQ(ierr);
253     ierr = MatDestroy(&TTA);CHKERRQ(ierr);
254   }
255 
256   if (testcircular) { /* test circular */
257     Mat AB;
258 
259     ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AB);CHKERRQ(ierr);
260     ierr = MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
261     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
262     if (M == N && N == K) {
263       ierr = MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
264     } else {
265       ierr = MatTransposeMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
266     }
267     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
268     ierr = MatDestroy(&AB);CHKERRQ(ierr);
269   }
270   ierr = MatDestroy(&X);CHKERRQ(ierr);
271   ierr = MatDestroy(&B);CHKERRQ(ierr);
272   ierr = MatDestroy(&A);CHKERRQ(ierr);
273   ierr = PetscFree(dataX);CHKERRQ(ierr);
274   ierr = PetscFree(dataB);CHKERRQ(ierr);
275   ierr = PetscFinalize();
276   return ierr;
277 }
278 
279 /*TEST
280 
281   test:
282     suffix: 1
283     args: -local {{0 1}}
284 
285   test:
286     output_file: output/ex70_1.out
287     requires: cuda
288     suffix: 1_cuda
289     args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{seqaijcusparse seqaij}} -testnest 0
290 
291   test:
292     TODO: VecGetSubVector seems broken with CUDA
293     output_file: output/ex70_1.out
294     requires: cuda
295     suffix: 1_cuda_broken
296     args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type seqaijcusparse -testnest
297 
298   test:
299     output_file: output/ex70_1.out
300     nsize: 2
301     suffix: 1_par
302     args: -testtranspose -local {{0 1}}
303 
304   test:
305     output_file: output/ex70_1.out
306     requires: cuda
307     nsize: 2
308     suffix: 1_par_cuda
309     args: -testtranspose 0 -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{mpiaijcusparse mpiaij}} -testnest 0
310 
311   test:
312     output_file: output/ex70_1.out
313     suffix: 2
314     nsize: 1
315     args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}}
316 
317   test:
318     requires: cuda
319     output_file: output/ex70_1.out
320     suffix: 2_cuda
321     nsize: 1
322     args: -M 7 -N 9 -K 2 -local {{0 1}} -testnest 0 -A_mat_type {{seqdensecuda seqdense}} -xgpu {{0 1}} -bgpu {{0 1}}
323 
324   test:
325     output_file: output/ex70_1.out
326     suffix: 2_par
327     nsize: 2
328     args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}} -testcircular
329 
330   test:
331     requires: cuda
332     output_file: output/ex70_1.out
333     suffix: 2_par_cuda
334     nsize: 2
335     args: -M 11 -N 9 -K 1 -local {{0 1}} -testcircular 0 -A_mat_type mpiaijcusparse -xgpu -bgpu -testnest 0
336 
337   test:
338     output_file: output/ex70_1.out
339     suffix: 3
340     nsize: {{1 3}}
341     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type sbaij -symm -testtranspose 0
342 
343   test:
344     output_file: output/ex70_1.out
345     suffix: 4
346     nsize: 1
347     args: -M 3 -N 3 -K 3 -local {{0 1}} -testcircular
348 
349   test:
350     output_file: output/ex70_1.out
351     suffix: 5
352     nsize: {{2 4}}
353     args: -M 3 -N 3 -K 3 -local 1 -testcircular -testtranspose 0
354 
355   test:
356     TODO: MatCreateDense broken with some processors not having local rows
357     output_file: output/ex70_1.out
358     suffix: 5_broken
359     nsize: {{2 4}}
360     args: -M 3 -N 3 -K 3 -local 0 -testcircular -testtranspose 0
361 
362   test:
363     output_file: output/ex70_1.out
364     suffix: 6
365     nsize: 1
366     args: -M {{1 3}} -N {{2 5}} -K {{1 2}} -local {{0 1}} -testcircular -testtranspose
367 
368   test:
369     output_file: output/ex70_1.out
370     suffix: 7
371     nsize: 1
372     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type dense -testnest 0 -testcircular
373 
374   test:
375     TODO: NEST x DENSE with dense nested matrices seems broken in this case
376     output_file: output/ex70_1.out
377     suffix: 7_broken
378     nsize: 1
379     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type dense -testnest -testcircular
380 
381 TEST*/
382