xref: /petsc/src/mat/tests/ex66.c (revision d21efd2e5d911db017a545648c4fa4838359bb2d)
1 static char help[] = "Tests MATH2OPUS\n\n";
2 
3 #include <petscmat.h>
4 #include <petscsf.h>
5 
6 static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
7 {
8     PetscInt  d;
9     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
10     PetscReal dist, diff = 0.0;
11 
12     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
13     dist = PetscSqrtReal(diff);
14     return PetscExpReal(-dist / clength);
15 }
16 
17 static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
18 {
19     PetscInt  d;
20     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
21     PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;
22 
23     for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; }
24     for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; }
25     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
26     dist = PetscSqrtReal(diff);
27     return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
28 }
29 
30 int main(int argc,char **argv)
31 {
32   Mat            A,B,C,D;
33   Vec            v,x,y,Ax,Ay,Bx,By;
34   PetscRandom    r;
35   PetscLayout    map;
36   PetscScalar    *Adata = NULL, *Cdata = NULL, scale = 1.0;
37   PetscReal      *coords,nA,nD,nB,err,nX,norms[3];
38   PetscInt       N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, ldu = 0, nlr = 7, nt, ntrials = 2;
39   PetscMPIInt    size,rank;
40   PetscBool      testlayout = PETSC_FALSE, flg, symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
41   PetscBool      checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE, flgglob;
42   PetscBool      testtrans, testnorm, randommat = PETSC_TRUE, testorthog, testcompress, testhlru;
43   void           (*approxnormfunc)(void);
44   void           (*Anormfunc)(void);
45   PetscErrorCode ierr;
46 
47 #if defined(PETSC_HAVE_MPI_INIT_THREAD)
48   PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
49 #endif
50   ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
51   ierr = PetscOptionsGetInt(NULL,NULL,"-ng",&N,&flgglob);CHKERRQ(ierr);
52   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
53   ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr);
54   ierr = PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);CHKERRQ(ierr);
55   ierr = PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);CHKERRQ(ierr);
56   ierr = PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);CHKERRQ(ierr);
57   ierr = PetscOptionsGetInt(NULL,NULL,"-nlr",&nlr,NULL);CHKERRQ(ierr);
58   ierr = PetscOptionsGetInt(NULL,NULL,"-ldu",&ldu,NULL);CHKERRQ(ierr);
59   ierr = PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);CHKERRQ(ierr);
60   ierr = PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);CHKERRQ(ierr);
61   if (!flgglob) { ierr = PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL);CHKERRQ(ierr); }
62   ierr = PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);CHKERRQ(ierr);
63   ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr);
64   ierr = PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);CHKERRQ(ierr);
65   ierr = PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);CHKERRQ(ierr);
66   ierr = PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);CHKERRQ(ierr);
67   ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr);
68   ierr = PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);CHKERRQ(ierr);
69   ierr = PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);CHKERRQ(ierr);
70   if (!Asymm) symm = PETSC_FALSE;
71 
72   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr);
73 
74   /* Disable tests for unimplemented variants */
75   testtrans = (PetscBool)(size == 1 || symm);
76   testnorm = (PetscBool)(size == 1 || symm);
77   testorthog = (PetscBool)(size == 1 || symm);
78   testcompress = (PetscBool)(size == 1 || symm);
79   testhlru = (PetscBool)(size == 1);
80 
81   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRMPI(ierr);
82   ierr = PetscLayoutCreate(PETSC_COMM_WORLD,&map);CHKERRQ(ierr);
83   if (testlayout) {
84     if (rank%2) n = PetscMax(2*n-5*rank,0);
85     else n = 2*n+rank;
86   }
87   if (!flgglob) {
88     ierr = PetscLayoutSetLocalSize(map,n);CHKERRQ(ierr);
89     ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
90     ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
91   } else {
92     ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
93     ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
94     ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
95   }
96   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
97 
98   if (lda) {
99     ierr = PetscMalloc1(N*(n+lda),&Adata);CHKERRQ(ierr);
100   }
101   ierr = MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);CHKERRQ(ierr);
102   ierr = MatDenseSetLDA(A,n+lda);CHKERRQ(ierr);
103 
104   /* Create random points; these are replicated in order to populate a dense matrix and to compare sequential and dense runs
105      The constructor for MATH2OPUS can take as input the distributed coordinates and replicates them internally in case
106      a kernel construction is requested */
107   ierr = PetscRandomCreate(PETSC_COMM_WORLD,&r);CHKERRQ(ierr);
108   ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr);
109   ierr = PetscRandomSetSeed(r,123456);CHKERRQ(ierr);
110   ierr = PetscRandomSeed(r);CHKERRQ(ierr);
111   ierr = PetscMalloc1(N*dim,&coords);CHKERRQ(ierr);
112   ierr = PetscRandomGetValuesReal(r,N*dim,coords);CHKERRQ(ierr);
113   ierr = PetscRandomDestroy(&r);CHKERRQ(ierr);
114 
115   if (kernel || !randommat) {
116     MatH2OpusKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
117     PetscInt        ist,ien;
118 
119     ierr = MatGetOwnershipRange(A,&ist,&ien);CHKERRQ(ierr);
120     for (i = ist; i < ien; i++) {
121       for (j = 0; j < N; j++) {
122         ierr = MatSetValue(A,i,j,(*k)(dim,coords + i*dim,coords + j*dim,NULL),INSERT_VALUES);CHKERRQ(ierr);
123       }
124     }
125     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
126     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
127     if (kernel) {
128       ierr = MatCreateH2OpusFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords + ist*dim,PETSC_TRUE,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
129     } else {
130       ierr = MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
131     }
132   } else {
133     PetscInt ist;
134 
135     ierr = MatGetOwnershipRange(A,&ist,NULL);CHKERRQ(ierr);
136     ierr = MatSetRandom(A,NULL);CHKERRQ(ierr);
137     if (Asymm) {
138       ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
139       ierr = MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
140       ierr = MatDestroy(&B);CHKERRQ(ierr);
141       ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
142     }
143     ierr = MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
144   }
145   ierr = PetscFree(coords);CHKERRQ(ierr);
146   if (agpu) {
147     ierr = MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
148   }
149   ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr);
150 
151   ierr = MatSetOption(B,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
152 
153   /* assemble the H-matrix */
154   ierr = MatBindToCPU(B,(PetscBool)!bgpu);CHKERRQ(ierr);
155   ierr = MatSetFromOptions(B);CHKERRQ(ierr);
156   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
157   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
158   ierr = MatViewFromOptions(B,NULL,"-B_view");CHKERRQ(ierr);
159 
160   /* Test MatScale */
161   ierr = MatScale(A,scale);CHKERRQ(ierr);
162   ierr = MatScale(B,scale);CHKERRQ(ierr);
163 
164   /* Test MatMult */
165   ierr = MatCreateVecs(A,&Ax,&Ay);CHKERRQ(ierr);
166   ierr = MatCreateVecs(B,&Bx,&By);CHKERRQ(ierr);
167   ierr = VecSetRandom(Ax,NULL);CHKERRQ(ierr);
168   ierr = VecCopy(Ax,Bx);CHKERRQ(ierr);
169   ierr = MatMult(A,Ax,Ay);CHKERRQ(ierr);
170   ierr = MatMult(B,Bx,By);CHKERRQ(ierr);
171   ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr);
172   ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr);
173   ierr = VecNorm(Ay,NORM_INFINITY,&nX);CHKERRQ(ierr);
174   ierr = VecAXPY(Ay,-1.0,By);CHKERRQ(ierr);
175   ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr);
176   ierr = VecNorm(Ay,NORM_INFINITY,&err);CHKERRQ(ierr);
177   ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);CHKERRQ(ierr);
178   ierr = VecScale(By,-1.0);CHKERRQ(ierr);
179   ierr = MatMultAdd(B,Bx,By,By);CHKERRQ(ierr);
180   ierr = VecNorm(By,NORM_INFINITY,&err);CHKERRQ(ierr);
181   ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr);
182   if (err > 10.*PETSC_SMALL) {
183     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);CHKERRQ(ierr);
184   }
185 
186   /* Test MatNorm */
187   ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
188   ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr);
189   norms[2] = -1.; /* NORM_2 not supported */
190   ierr = PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms:        infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
191   ierr = MatGetOperation(A,MATOP_NORM,&Anormfunc);CHKERRQ(ierr);
192   ierr = MatGetOperation(B,MATOP_NORM,&approxnormfunc);CHKERRQ(ierr);
193   ierr = MatSetOperation(A,MATOP_NORM,approxnormfunc);CHKERRQ(ierr);
194   ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
195   ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr);
196   ierr = MatNorm(A,NORM_2,&norms[2]);CHKERRQ(ierr);
197   ierr = PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
198   if (testnorm) {
199     ierr = MatNorm(B,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
200     ierr = MatNorm(B,NORM_1,&norms[1]);CHKERRQ(ierr);
201     ierr = MatNorm(B,NORM_2,&norms[2]);CHKERRQ(ierr);
202   } else {
203     norms[0] = -1.;
204     norms[1] = -1.;
205     norms[2] = -1.;
206   }
207   ierr = PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
208   ierr = MatSetOperation(A,MATOP_NORM,Anormfunc);CHKERRQ(ierr);
209 
210   /* Test MatDuplicate */
211   ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
212   ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
213   ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr);
214   if (!flg) {
215     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");CHKERRQ(ierr);
216   }
217   if (testtrans) {
218     ierr = MatMultTransposeEqual(B,D,10,&flg);CHKERRQ(ierr);
219     if (!flg) {
220       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose error after MatDuplicate\n");CHKERRQ(ierr);
221     }
222   }
223   ierr = MatDestroy(&D);CHKERRQ(ierr);
224 
225   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
226     ierr = VecSetRandom(Ay,NULL);CHKERRQ(ierr);
227     ierr = VecCopy(Ay,By);CHKERRQ(ierr);
228     ierr = MatMultTranspose(A,Ay,Ax);CHKERRQ(ierr);
229     ierr = MatMultTranspose(B,By,Bx);CHKERRQ(ierr);
230     ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
231     ierr = VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
232     ierr = VecNorm(Ax,NORM_INFINITY,&nX);CHKERRQ(ierr);
233     ierr = VecAXPY(Ax,-1.0,Bx);CHKERRQ(ierr);
234     ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
235     ierr = VecNorm(Ax,NORM_INFINITY,&err);CHKERRQ(ierr);
236     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);CHKERRQ(ierr);
237     ierr = VecScale(Bx,-1.0);CHKERRQ(ierr);
238     ierr = MatMultTransposeAdd(B,By,Bx,Bx);CHKERRQ(ierr);
239     ierr = VecNorm(Bx,NORM_INFINITY,&err);CHKERRQ(ierr);
240     if (err > 10.*PETSC_SMALL) {
241       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);CHKERRQ(ierr);
242     }
243   }
244   ierr = VecDestroy(&Ax);CHKERRQ(ierr);
245   ierr = VecDestroy(&Ay);CHKERRQ(ierr);
246   ierr = VecDestroy(&Bx);CHKERRQ(ierr);
247   ierr = VecDestroy(&By);CHKERRQ(ierr);
248 
249   /* Test MatMatMult */
250   if (ldc) {
251     ierr = PetscMalloc1(nrhs*(n+ldc),&Cdata);CHKERRQ(ierr);
252   }
253   ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);CHKERRQ(ierr);
254   ierr = MatDenseSetLDA(C,n+ldc);CHKERRQ(ierr);
255   ierr = MatSetRandom(C,NULL);CHKERRQ(ierr);
256   if (cgpu) {
257     ierr = MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);CHKERRQ(ierr);
258   }
259   for (nt = 0; nt < ntrials; nt++) {
260     ierr = MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr);
261     ierr = MatViewFromOptions(D,NULL,"-bc_view");CHKERRQ(ierr);
262     ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr);
263     if (flg) {
264       ierr = MatCreateVecs(B,&x,&y);CHKERRQ(ierr);
265       ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr);
266       for (i = 0; i < nrhs; i++) {
267         ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr);
268         ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr);
269         ierr = MatMult(B,x,y);CHKERRQ(ierr);
270         ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr);
271         ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr);
272         if (err > 10.*PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMat err %" PetscInt_FMT " %g\n",i,err);CHKERRQ(ierr); }
273       }
274       ierr = VecDestroy(&y);CHKERRQ(ierr);
275       ierr = VecDestroy(&x);CHKERRQ(ierr);
276       ierr = VecDestroy(&v);CHKERRQ(ierr);
277     }
278   }
279   ierr = MatDestroy(&D);CHKERRQ(ierr);
280 
281   /* Test MatTransposeMatMult */
282   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
283     for (nt = 0; nt < ntrials; nt++) {
284       ierr = MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr);
285       ierr = MatViewFromOptions(D,NULL,"-btc_view");CHKERRQ(ierr);
286       ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr);
287       if (flg) {
288         ierr = MatCreateVecs(B,&y,&x);CHKERRQ(ierr);
289         ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr);
290         for (i = 0; i < nrhs; i++) {
291           ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr);
292           ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr);
293           ierr = MatMultTranspose(B,x,y);CHKERRQ(ierr);
294           ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr);
295           ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr);
296           if (err > 10.*PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %" PetscInt_FMT " %g\n",i,err);CHKERRQ(ierr); }
297         }
298         ierr = VecDestroy(&y);CHKERRQ(ierr);
299         ierr = VecDestroy(&x);CHKERRQ(ierr);
300         ierr = VecDestroy(&v);CHKERRQ(ierr);
301       }
302     }
303     ierr = MatDestroy(&D);CHKERRQ(ierr);
304   }
305 
306   /* Test basis orthogonalization */
307   if (testorthog) {
308     ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
309     ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
310     ierr = MatH2OpusOrthogonalize(D);CHKERRQ(ierr);
311     ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr);
312     if (!flg) {
313       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after basis ortogonalization\n");CHKERRQ(ierr);
314     }
315     ierr = MatDestroy(&D);CHKERRQ(ierr);
316   }
317 
318   /* Test matrix compression */
319   if (testcompress) {
320     ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
321     ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
322     ierr = MatH2OpusCompress(D,PETSC_SMALL);CHKERRQ(ierr);
323     ierr = MatDestroy(&D);CHKERRQ(ierr);
324   }
325 
326   /* Test low-rank update */
327   if (testhlru) {
328     Mat         U, V;
329     PetscScalar *Udata = NULL, *Vdata = NULL;
330 
331     if (ldu) {
332       ierr = PetscMalloc1(nlr*(n+ldu),&Udata);CHKERRQ(ierr);
333       ierr = PetscMalloc1(nlr*(n+ldu+2),&Vdata);CHKERRQ(ierr);
334     }
335     ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
336     ierr = MatCreateDense(PetscObjectComm((PetscObject)D),n,PETSC_DECIDE,N,nlr,Udata,&U);CHKERRQ(ierr);
337     ierr = MatDenseSetLDA(U,n+ldu);CHKERRQ(ierr);
338     ierr = MatCreateDense(PetscObjectComm((PetscObject)D),n,PETSC_DECIDE,N,nlr,Vdata,&V);CHKERRQ(ierr);
339     if (ldu) { ierr = MatDenseSetLDA(V,n+ldu+2);CHKERRQ(ierr); }
340     ierr = MatSetRandom(U,NULL);CHKERRQ(ierr);
341     ierr = MatSetRandom(V,NULL);CHKERRQ(ierr);
342     ierr = MatH2OpusLowRankUpdate(D,U,V,0.5);CHKERRQ(ierr);
343     ierr = MatH2OpusLowRankUpdate(D,U,V,-0.5);CHKERRQ(ierr);
344     ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr);
345     if (!flg) {
346       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after low-rank update\n");CHKERRQ(ierr);
347     }
348     ierr = MatDestroy(&D);CHKERRQ(ierr);
349     ierr = MatDestroy(&U);CHKERRQ(ierr);
350     ierr = PetscFree(Udata);CHKERRQ(ierr);
351     ierr = MatDestroy(&V);CHKERRQ(ierr);
352     ierr = PetscFree(Vdata);CHKERRQ(ierr);
353   }
354 
355   /* check explicit operator */
356   if (checkexpl) {
357     Mat Be, Bet;
358 
359     ierr = MatComputeOperator(B,MATDENSE,&D);CHKERRQ(ierr);
360     ierr = MatDuplicate(D,MAT_COPY_VALUES,&Be);CHKERRQ(ierr);
361     ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
362     ierr = MatViewFromOptions(D,NULL,"-expl_view");CHKERRQ(ierr);
363     ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
364     ierr = MatViewFromOptions(D,NULL,"-diff_view");CHKERRQ(ierr);
365     ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr);
366     ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr);
367     ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr);
368     ierr = MatDestroy(&D);CHKERRQ(ierr);
369 
370     if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
371       ierr = MatTranspose(A,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
372       ierr = MatComputeOperatorTranspose(B,MATDENSE,&D);CHKERRQ(ierr);
373       ierr = MatDuplicate(D,MAT_COPY_VALUES,&Bet);CHKERRQ(ierr);
374       ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
375       ierr = MatViewFromOptions(D,NULL,"-expl_trans_view");CHKERRQ(ierr);
376       ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
377       ierr = MatViewFromOptions(D,NULL,"-diff_trans_view");CHKERRQ(ierr);
378       ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr);
379       ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr);
380       ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr);
381       ierr = MatDestroy(&D);CHKERRQ(ierr);
382 
383       ierr = MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);CHKERRQ(ierr);
384       ierr = MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
385       ierr = MatViewFromOptions(Be,NULL,"-diff_expl_view");CHKERRQ(ierr);
386       ierr = MatNorm(Be,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
387       ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);CHKERRQ(ierr);
388       ierr = MatDestroy(&Be);CHKERRQ(ierr);
389       ierr = MatDestroy(&Bet);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatDestroy(&A);CHKERRQ(ierr);
393   ierr = MatDestroy(&B);CHKERRQ(ierr);
394   ierr = MatDestroy(&C);CHKERRQ(ierr);
395   ierr = PetscFree(Cdata);CHKERRQ(ierr);
396   ierr = PetscFree(Adata);CHKERRQ(ierr);
397   ierr = PetscFinalize();
398   return ierr;
399 }
400 
401 /*TEST
402 
403    build:
404      requires: h2opus
405 
406 #tests from kernel
407    test:
408      requires: h2opus
409      nsize: 1
410      suffix: 1
411      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0
412 
413    test:
414      requires: h2opus
415      nsize: 1
416      suffix: 1_ld
417      output_file: output/ex66_1.out
418      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -ldu 17 -symm 0 -checkexpl -bgpu 0
419 
420    test:
421      requires: h2opus cuda
422      nsize: 1
423      suffix: 1_cuda
424      output_file: output/ex66_1.out
425      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1
426 
427    test:
428      requires: h2opus cuda
429      nsize: 1
430      suffix: 1_cuda_ld
431      output_file: output/ex66_1.out
432      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -ldu 17 -symm 0 -checkexpl -bgpu 1
433 
434    test:
435      requires: h2opus
436      nsize: {{2 3}}
437      suffix: 1_par
438      args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0
439 
440    test:
441      requires: h2opus cuda
442      nsize: {{2 3}}
443      suffix: 1_par_cuda
444      args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
445      output_file: output/ex66_1_par.out
446 
447 #tests from matrix sampling (parallel or unsymmetric not supported)
448    test:
449      requires: h2opus
450      nsize: 1
451      suffix: 2
452      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0
453 
454    test:
455      requires: h2opus cuda
456      nsize: 1
457      suffix: 2_cuda
458      output_file: output/ex66_2.out
459      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}
460 
461 #tests view operation
462    test:
463      requires: h2opus !cuda
464      filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
465      nsize: {{1 2 3}}
466      suffix: view
467      args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
468 
469    test:
470      requires: h2opus cuda
471      filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
472      nsize: {{1 2 3}}
473      suffix: view_cuda
474      args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -bgpu -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
475 
476 TEST*/
477