xref: /petsc/src/mat/tests/ex226.c (revision 2f613bf53f46f9356e00a2ca2bd69453be72fc31)
1 static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2 
3 #include <petscmat.h>
4 
5 /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6 int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; }
7 
8 int main(int argc,char **argv)
9 {
10   Mat            A,B,C,PtAP,PtAP_copy,PtAP_squared;
11   PetscInt       i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1;
12   PetscScalar    v;
13   PetscErrorCode ierr;
14   PetscBool      equal=PETSC_FALSE,mat_view=PETSC_FALSE;
15   char           stencil[PETSC_MAX_PATH_LEN];
16 #if defined(PETSC_USE_LOG)
17   PetscLogStage  fullMatMatMultStage;
18 #endif
19 
20   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
21   ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr);
22   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
23   ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr);
24   ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr);
25   ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL);CHKERRQ(ierr);
26 
27   /* Create a aij matrix A */
28   M    = N = m*n*o;
29   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
30   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
31   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
32   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
33 
34   /* Consistency checks */
35   if (o < 1 || m < 1 || n < 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!");
36 
37   /************ 2D stencils ***************/
38   ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr);
39   if (equal) {   /* 5-point stencil, 2D */
40     ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr);
41     ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr);
42     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
43     for (Ii=Istart; Ii<Iend; Ii++) {
44       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
45       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
46       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
47       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
48       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
49       v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
50     }
51   }
52   ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr);
53   if (equal) {      /* 9-point stencil, 2D */
54     ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
55     ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
56     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
57     for (Ii=Istart; Ii<Iend; Ii++) {
58       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
59       if (i>0)            {J = global_index(i-1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
60       if (i>0 && j>0)   {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
61       if (j>0)            {J = global_index(i,  j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
62       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
63       if (i<m-1)          {J = global_index(i+1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
64       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
65       if (j<n-1)          {J = global_index(i,  j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
66       if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
67       v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
68     }
69   }
70   ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr);
71   if (equal) {      /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
72     ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
73     ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
74     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
75     for (Ii=Istart; Ii<Iend; Ii++) {
76       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
77       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
78       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
79       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
80       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
81       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
82       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
83       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
84       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
85       v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
86     }
87   }
88   ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr);
89   if (equal) {      /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
90     ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
91     ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
92     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
93     for (Ii=Istart; Ii<Iend; Ii++) {
94       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
95       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
96       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
97       if (i>2)   {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
98       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
99       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
100       if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
101       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
102       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
103       if (j>2)   {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
104       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
105       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
106       if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
107       v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
108     }
109   }
110   /************ 3D stencils ***************/
111   ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr);
112   if (equal) {      /* 7-point stencil, 3D */
113     ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr);
114     ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr);
115     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
116     for (Ii=Istart; Ii<Iend; Ii++) {
117       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
118       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
119       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
120       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
121       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
122       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
123       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
124       v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
125     }
126   }
127   ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr);
128   if (equal) {      /* 13-point stencil, 3D */
129     ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
130     ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
131     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
132     for (Ii=Istart; Ii<Iend; Ii++) {
133       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
134       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
135       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
136       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
137       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
138       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
139       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
140       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
141       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
142       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
143       if (k>1)   {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
144       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
145       if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
146       v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
147     }
148   }
149   ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr);
150   if (equal) {      /* 19-point stencil, 3D */
151     ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr);
152     ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr);
153     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
154     for (Ii=Istart; Ii<Iend; Ii++) {
155       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
156       /* one hop */
157       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
158       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
159       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
160       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
161       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
162       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
163       /* two hops */
164       if (i>0   && j>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
165       if (i>0   && k>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
166       if (i>0   && j<n-1) {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
167       if (i>0   && k<o-1) {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
168       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
169       if (i<m-1 && k>0)   {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
170       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
171       if (i<m-1 && k<o-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
172       if (j>0   && k>0)   {J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
173       if (j>0   && k<o-1) {J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
174       if (j<n-1 && k>0)   {J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
175       if (j<n-1 && k<o-1) {J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
176       v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
177     }
178   }
179   ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr);
180   if (equal) {      /* 27-point stencil, 3D */
181     ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr);
182     ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr);
183     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
184     for (Ii=Istart; Ii<Iend; Ii++) {
185       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
186       if (k>0) {
187         if (j>0) {
188           if (i>0)   {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
189                       J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
190           if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
191         }
192         {
193           if (i>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
194                       J = global_index(i,  j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
195           if (i<m-1) {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
196         }
197         if (j<n-1) {
198           if (i>0)   {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
199                       J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
200           if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
201         }
202       }
203       {
204         if (j>0) {
205           if (i>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
206                       J = global_index(i,  j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
207           if (i<m-1) {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
208         }
209         {
210           if (i>0)   {J = global_index(i-1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
211                       J = global_index(i,  j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
212           if (i<m-1) {J = global_index(i+1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
213         }
214         if (j<n-1) {
215           if (i>0)   {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
216                       J = global_index(i,  j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
217           if (i<m-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
218         }
219       }
220       if (k<o-1) {
221         if (j>0) {
222           if (i>0)   {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
223                       J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
224           if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
225         }
226         {
227           if (i>0)   {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
228                       J = global_index(i,  j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
229           if (i<m-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
230         }
231         if (j<n-1) {
232           if (i>0)   {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
233                       J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
234           if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
235         }
236       }
237       v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
238     }
239   }
240   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
241   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
242 
243   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
244   ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
245 
246   ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr);
247 
248   /* Test C = A*B */
249   ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr);
250   ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr);
251 
252   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
253   ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr);
254   ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr);
255   ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr);
256 
257   ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
258   ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
259 
260   ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr);
261   ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr);
262   ierr = MatDestroy(&PtAP);CHKERRQ(ierr);
263   ierr = MatDestroy(&C);CHKERRQ(ierr);
264   ierr = MatDestroy(&B);CHKERRQ(ierr);
265   ierr = MatDestroy(&A);CHKERRQ(ierr);
266   ierr = PetscFinalize();
267   return ierr;
268 }
269 
270 /*TEST
271 
272  test:
273       suffix: 1
274       nsize: 1
275       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
276 
277  test:
278        suffix: 2
279        nsize: 1
280        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
281 
282  test:
283       suffix: 3
284       nsize: 4
285       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
286 
287 TEST*/
288