xref: /petsc/src/mat/impls/sell/seq/sell.c (revision 5e71baeff2f3138f93cd4f5927dfd596eb8325cc)
1 
2 /*
3   Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H)
9 #include <immintrin.h>
10 
11 #if !defined(_MM_SCALE_8)
12 #define _MM_SCALE_8    8
13 #endif
14 
15 #if defined(__AVX512F__)
16 /* these do not work
17  vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
18  vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
19 */
20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
22 vec_idx  = _mm256_load_si256((__m256i const*)acolidx); \
23 vec_vals = _mm512_load_pd(aval); \
24 vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
25 vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y);
26 
27 #endif
28 #endif  /* PETSC_HAVE_IMMINTRIN_H */
29 
30 /*@C
31  MatSeqSELLSetPreallocation - For good matrix assembly performance
32  the user should preallocate the matrix storage by setting the parameter nz
33  (or the array nnz).  By setting these parameters accurately, performance
34  during matrix assembly can be increased significantly.
35 
36  Collective on MPI_Comm
37 
38  Input Parameters:
39  +  B - The matrix
40  .  nz - number of nonzeros per row (same for all rows)
41  -  nnz - array containing the number of nonzeros in the various rows
42  (possibly different for each row) or NULL
43 
44  Notes:
45  If nnz is given then nz is ignored.
46 
47  Specify the preallocated storage with either nz or nnz (not both).
48  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
49  allocation.  For large problems you MUST preallocate memory or you
50  will get TERRIBLE performance, see the users' manual chapter on matrices.
51 
52  You can call MatGetInfo() to get information on how effective the preallocation was;
53  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
54  You can also run with the option -info and look for messages with the string
55  malloc in them to see if additional memory allocation was needed.
56 
57  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
58  entries or columns indices.
59 
60  The maximum number of nonzeos in any row should be as accuate as possible.
61  If it is underesitmated, you will get bad performance due to reallocation
62  (MatSeqXSELLReallocateSELL).
63 
64  Level: intermediate
65 
66  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
67 
68  @*/
69 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
70 {
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
75   PetscValidType(B,1);
76   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
81 {
82   Mat_SeqSELL    *b;
83   PetscInt       i,j,totalslices;
84   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
89   if (maxallocrow == MAT_SKIP_ALLOCATION) {
90     skipallocation = PETSC_TRUE;
91     maxallocrow    = 0;
92   }
93 
94   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
95   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
96 
97   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
98   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
99   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
100   if (rlen) {
101     for (i=0; i<B->rmap->n; i++) {
102       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
103       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
104     }
105   }
106 
107   B->preallocated = PETSC_TRUE;
108 
109   b = (Mat_SeqSELL*)B->data;
110 
111   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
112   b->totalslices = totalslices;
113   if (!skipallocation) {
114     if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);
115 
116     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
117       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
118       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
119     }
120     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
121       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
122       else if (maxallocrow < 0) maxallocrow = 1;
123       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
124     } else {
125       maxallocrow = 0;
126       b->sliidx[0] = 0;
127       for (i=1; i<totalslices; i++) {
128         b->sliidx[i] = 0;
129         for (j=0;j<8;j++) {
130           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
131         }
132         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
133         b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
134       }
135       /* last slice */
136       b->sliidx[totalslices] = 0;
137       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
138       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
139       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
140     }
141 
142     /* allocate space for val, colidx, rlen */
143     /* FIXME: should B's old memory be unlogged? */
144     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
145     /* FIXME: assuming an element of the bit array takes 8 bits */
146     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
147     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
148     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
149     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
150     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
151 
152     b->singlemalloc = PETSC_TRUE;
153     b->free_val     = PETSC_TRUE;
154     b->free_colidx  = PETSC_TRUE;
155   } else {
156     b->free_val    = PETSC_FALSE;
157     b->free_colidx = PETSC_FALSE;
158   }
159 
160   b->nz               = 0;
161   b->maxallocrow      = maxallocrow;
162   b->rlenmax          = maxallocrow;
163   b->maxallocmat      = b->sliidx[totalslices];
164   B->info.nz_unneeded = (double)b->maxallocmat;
165   if (realalloc) {
166     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
172 {
173   Mat            B;
174   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data;
175   PetscInt       i,j,row;
176   PetscBool      isnonzero;
177   PetscErrorCode ierr;
178 
179   PetscFunctionBegin;
180   if (reuse == MAT_REUSE_MATRIX) {
181     B = *newmat;
182   } else {
183     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
184     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
185     ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
186     ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
187     ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
188   }
189 
190   for (i=0; i<a->totalslices; i++) { /* loop over slices */
191     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
192       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
193       if (isnonzero) {
194         ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr);
195       }
196     }
197   }
198   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
199   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
200   B->rmap->bs = A->rmap->bs;
201 
202   if (reuse == MAT_INPLACE_MATRIX) {
203     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
204   } else {
205     *newmat = B;
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 #include <../src/mat/impls/aij/seq/aij.h>
211 
212 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
213 {
214   Mat               B;
215   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
216   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
217   const PetscInt    *cols;
218   const PetscScalar *vals;
219   PetscErrorCode    ierr;
220 
221   PetscFunctionBegin;
222   if (A->rmap->bs > 1) {
223     ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr);
224     PetscFunctionReturn(0);
225   }
226 
227   if (reuse == MAT_REUSE_MATRIX) {
228     B = *newmat;
229   } else {
230     /* Can we just use ilen? */
231     ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
232     for (i=0; i<m; i++) {
233       rowlengths[i] = ai[i+1] - ai[i];
234     }
235 
236     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
237     ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
238     ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
239     ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
240     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
241   }
242 
243   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
244 
245   for (row=0; row<m; row++) {
246     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
247     ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
248     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
249   }
250   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
252   B->rmap->bs = A->rmap->bs;
253 
254   if (reuse == MAT_INPLACE_MATRIX) {
255     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
256   } else {
257     *newmat = B;
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
263 {
264   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
265   PetscScalar       *y;
266   const PetscScalar *x;
267   const MatScalar   *aval=a->val;
268   PetscInt          totalslices=a->totalslices;
269   const PetscInt    *acolidx=a->colidx;
270   PetscInt          i,j;
271   PetscErrorCode    ierr;
272 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
273   __m512d           vec_x,vec_y,vec_vals;
274   __m256i           vec_idx;
275   __mmask8          mask;
276   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
277   __m256i           vec_idx2,vec_idx3,vec_idx4;
278 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
279   __m128d           vec_x_tmp;
280   __m256d           vec_x,vec_y,vec_y2,vec_vals;
281   MatScalar         yval;
282   PetscInt          r,rows_left,row,nnz_in_row;
283 #else
284   PetscScalar       sum[8];
285 #endif
286 
287 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
288 #pragma disjoint(*x,*y,*aval)
289 #endif
290 
291   PetscFunctionBegin;
292   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
293   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
294 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
295   for (i=0; i<totalslices; i++) { /* loop over slices */
296     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
297     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
298 
299     vec_y  = _mm512_setzero_pd();
300     vec_y2 = _mm512_setzero_pd();
301     vec_y3 = _mm512_setzero_pd();
302     vec_y4 = _mm512_setzero_pd();
303 
304     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
305     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
306     case 3:
307       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
308       acolidx += 8; aval += 8;
309       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
310       acolidx += 8; aval += 8;
311       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
312       acolidx += 8; aval += 8;
313       j += 3;
314       break;
315     case 2:
316       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
317       acolidx += 8; aval += 8;
318       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
319       acolidx += 8; aval += 8;
320       j += 2;
321       break;
322     case 1:
323       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
324       acolidx += 8; aval += 8;
325       j += 1;
326       break;
327     }
328     #pragma novector
329     for (; j<(a->sliidx[i+1]>>3); j+=4) {
330       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
331       acolidx += 8; aval += 8;
332       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
333       acolidx += 8; aval += 8;
334       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
335       acolidx += 8; aval += 8;
336       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
337       acolidx += 8; aval += 8;
338     }
339 
340     vec_y = _mm512_add_pd(vec_y,vec_y2);
341     vec_y = _mm512_add_pd(vec_y,vec_y3);
342     vec_y = _mm512_add_pd(vec_y,vec_y4);
343     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
344       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
345       _mm512_mask_store_pd(&y[8*i],mask,vec_y);
346     } else {
347       _mm512_store_pd(&y[8*i],vec_y);
348     }
349   }
350 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
351   for (i=0; i<totalslices; i++) { /* loop over full slices */
352     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
353     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
354 
355     vec_y  = _mm256_setzero_pd();
356     vec_y2 = _mm256_setzero_pd();
357 
358     /* last slice may have padding rows. Don't use vectorization. */
359     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
360       rows_left = A->rmap->n - 8*i;
361       for (r=0; r<rows_left; ++r) {
362         yval = (MatScalar)0;
363         row = 8*i + r;
364         nnz_in_row = a->rlen[row];
365         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
366         y[row] = yval;
367       }
368       break;
369     }
370 
371     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
372     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
373       vec_vals  = _mm256_loadu_pd(aval);
374       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
375       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
376       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
377       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
378       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
379       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
380       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
381       aval     += 4;
382 
383       vec_vals  = _mm256_loadu_pd(aval);
384       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
385       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
386       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
387       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
388       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
389       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
390       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
391       aval     += 4;
392     }
393 
394     _mm256_storeu_pd(y + i*8,     vec_y);
395     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
396   }
397 #else
398   for (i=0; i<totalslices; i++) { /* loop over slices */
399     for (j=0; j<8; j++) sum[j] = 0.0;
400     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
401       sum[0] += aval[j] * x[acolidx[j]];
402       sum[1] += aval[j+1] * x[acolidx[j+1]];
403       sum[2] += aval[j+2] * x[acolidx[j+2]];
404       sum[3] += aval[j+3] * x[acolidx[j+3]];
405       sum[4] += aval[j+4] * x[acolidx[j+4]];
406       sum[5] += aval[j+5] * x[acolidx[j+5]];
407       sum[6] += aval[j+6] * x[acolidx[j+6]];
408       sum[7] += aval[j+7] * x[acolidx[j+7]];
409     }
410     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
411       for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
412     } else {
413       for(j=0; j<8; j++) y[8*i+j] = sum[j];
414     }
415   }
416 #endif
417 
418   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
419   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
420   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
425 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
426 {
427   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
428   PetscScalar       *y,*z;
429   const PetscScalar *x;
430   const MatScalar   *aval=a->val;
431   PetscInt          totalslices=a->totalslices;
432   const PetscInt    *acolidx=a->colidx;
433   PetscInt          i,j;
434   PetscErrorCode    ierr;
435 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
436   __m512d           vec_x,vec_y,vec_vals;
437   __m256i           vec_idx;
438   __mmask8          mask;
439   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
440   __m256i           vec_idx2,vec_idx3,vec_idx4;
441 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
442   __m128d           vec_x_tmp;
443   __m256d           vec_x,vec_y,vec_y2,vec_vals;
444   MatScalar         yval;
445   PetscInt          r,row,nnz_in_row;
446 #else
447   PetscScalar       sum[8];
448 #endif
449 
450 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
451 #pragma disjoint(*x,*y,*aval)
452 #endif
453 
454   PetscFunctionBegin;
455   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
456   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
457 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
458   for (i=0; i<totalslices; i++) { /* loop over slices */
459     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
460     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
461 
462     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
463       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
464       vec_y  = _mm512_mask_load_pd(vec_y,mask,&y[8*i]);
465     } else {
466       vec_y  = _mm512_load_pd(&y[8*i]);
467     }
468     vec_y2 = _mm512_setzero_pd();
469     vec_y3 = _mm512_setzero_pd();
470     vec_y4 = _mm512_setzero_pd();
471 
472     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
473     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
474     case 3:
475       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
476       acolidx += 8; aval += 8;
477       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
478       acolidx += 8; aval += 8;
479       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
480       acolidx += 8; aval += 8;
481       j += 3;
482       break;
483     case 2:
484       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
485       acolidx += 8; aval += 8;
486       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
487       acolidx += 8; aval += 8;
488       j += 2;
489       break;
490     case 1:
491       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
492       acolidx += 8; aval += 8;
493       j += 1;
494       break;
495     }
496     #pragma novector
497     for (; j<(a->sliidx[i+1]>>3); j+=4) {
498       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
499       acolidx += 8; aval += 8;
500       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
501       acolidx += 8; aval += 8;
502       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
503       acolidx += 8; aval += 8;
504       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
505       acolidx += 8; aval += 8;
506     }
507 
508     vec_y = _mm512_add_pd(vec_y,vec_y2);
509     vec_y = _mm512_add_pd(vec_y,vec_y3);
510     vec_y = _mm512_add_pd(vec_y,vec_y4);
511     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
512       _mm512_mask_store_pd(&z[8*i],mask,vec_y);
513     } else {
514       _mm512_store_pd(&z[8*i],vec_y);
515     }
516   }
517 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
518   for (i=0; i<totalslices; i++) { /* loop over full slices */
519     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
520     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
521 
522     /* last slice may have padding rows. Don't use vectorization. */
523     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
524       for (r=0; r<(A->rmap->n & 0x07); ++r) {
525         row        = 8*i + r;
526         yval       = (MatScalar)0.0;
527         nnz_in_row = a->rlen[row];
528         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
529         z[row] = y[row] + yval;
530       }
531       break;
532     }
533 
534     vec_y  = _mm256_loadu_pd(y+8*i);
535     vec_y2 = _mm256_loadu_pd(y+8*i+4);
536 
537     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
538     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
539       vec_vals  = _mm256_loadu_pd(aval);
540       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
541       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
542       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
543       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
544       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
545       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
546       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
547       aval     += 4;
548 
549       vec_vals  = _mm256_loadu_pd(aval);
550       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
551       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
552       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
553       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
554       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
555       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
556       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
557       aval     += 4;
558     }
559 
560     _mm256_storeu_pd(z+i*8,vec_y);
561     _mm256_storeu_pd(z+i*8+4,vec_y2);
562   }
563 #else
564   for (i=0; i<totalslices; i++) { /* loop over slices */
565     for (j=0; j<8; j++) sum[j] = 0.0;
566     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
567       sum[0] += aval[j] * x[acolidx[j]];
568       sum[1] += aval[j+1] * x[acolidx[j+1]];
569       sum[2] += aval[j+2] * x[acolidx[j+2]];
570       sum[3] += aval[j+3] * x[acolidx[j+3]];
571       sum[4] += aval[j+4] * x[acolidx[j+4]];
572       sum[5] += aval[j+5] * x[acolidx[j+5]];
573       sum[6] += aval[j+6] * x[acolidx[j+6]];
574       sum[7] += aval[j+7] * x[acolidx[j+7]];
575     }
576     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
577       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
578     } else {
579       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
580     }
581   }
582 #endif
583 
584   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
585   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
586   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
591 {
592   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
593   PetscScalar       *y;
594   const PetscScalar *x;
595   const MatScalar   *aval=a->val;
596   const PetscInt    *acolidx=a->colidx;
597   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
598   PetscErrorCode    ierr;
599 
600 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
601 #pragma disjoint(*x,*y,*aval)
602 #endif
603 
604   PetscFunctionBegin;
605   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
606   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
607   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
608   for (i=0; i<a->totalslices; i++) { /* loop over slices */
609     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
610       for (r=0; r<(A->rmap->n & 0x07); ++r) {
611         row        = 8*i + r;
612         nnz_in_row = a->rlen[row];
613         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
614       }
615       break;
616     }
617     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
618       y[acolidx[j]]   += aval[j] * x[8*i];
619       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
620       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
621       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
622       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
623       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
624       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
625       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
626     }
627   }
628   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
629   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
630   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
635 {
636   PetscErrorCode ierr;
637 
638   PetscFunctionBegin;
639   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
640   ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 /*
645      Checks for missing diagonals
646 */
647 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
648 {
649   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
650   PetscInt    *diag,i;
651 
652   PetscFunctionBegin;
653   *missing = PETSC_FALSE;
654   if (A->rmap->n > 0 && !(a->colidx)) {
655     *missing = PETSC_TRUE;
656     if (d) *d = 0;
657     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
658   } else {
659     diag = a->diag;
660     for (i=0; i<A->rmap->n; i++) {
661       if (diag[i] == -1) {
662         *missing = PETSC_TRUE;
663         if (d) *d = i;
664         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
665         break;
666       }
667     }
668   }
669   PetscFunctionReturn(0);
670 }
671 
672 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
673 {
674   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
675   PetscInt       i,j,m=A->rmap->n,shift;
676   PetscErrorCode ierr;
677 
678   PetscFunctionBegin;
679   if (!a->diag) {
680     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
681     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
682     a->free_diag = PETSC_TRUE;
683   }
684   for (i=0; i<m; i++) { /* loop over rows */
685     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
686     a->diag[i] = -1;
687     for (j=0; j<a->rlen[i]; j++) {
688       if (a->colidx[shift+j*8] == i) {
689         a->diag[i] = shift+j*8;
690         break;
691       }
692     }
693   }
694   PetscFunctionReturn(0);
695 }
696 
697 /*
698   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
699 */
700 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
701 {
702   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
703   PetscInt       i,*diag,m = A->rmap->n;
704   MatScalar      *val = a->val;
705   PetscScalar    *idiag,*mdiag;
706   PetscErrorCode ierr;
707 
708   PetscFunctionBegin;
709   if (a->idiagvalid) PetscFunctionReturn(0);
710   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
711   diag = a->diag;
712   if (!a->idiag) {
713     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
714     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
715     val  = a->val;
716   }
717   mdiag = a->mdiag;
718   idiag = a->idiag;
719 
720   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
721     for (i=0; i<m; i++) {
722       mdiag[i] = val[diag[i]];
723       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
724         if (PetscRealPart(fshift)) {
725           ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
726           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
727           A->factorerror_zeropivot_value = 0.0;
728           A->factorerror_zeropivot_row   = i;
729         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
730       }
731       idiag[i] = 1.0/val[diag[i]];
732     }
733     ierr = PetscLogFlops(m);CHKERRQ(ierr);
734   } else {
735     for (i=0; i<m; i++) {
736       mdiag[i] = val[diag[i]];
737       idiag[i] = omega/(fshift + val[diag[i]]);
738     }
739     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
740   }
741   a->idiagvalid = PETSC_TRUE;
742   PetscFunctionReturn(0);
743 }
744 
745 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
746 {
747   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr);
752   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatDestroy_SeqSELL(Mat A)
757 {
758   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762 #if defined(PETSC_USE_LOG)
763   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
764 #endif
765   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
766   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
767   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
768   ierr = PetscFree(a->diag);CHKERRQ(ierr);
769   ierr = PetscFree(a->ibdiag);CHKERRQ(ierr);
770   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
771   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
772   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
773   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
774   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
775   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
776 
777   ierr = PetscFree(A->data);CHKERRQ(ierr);
778 
779   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
780   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
781   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
782 #if defined(PETSC_HAVE_ELEMENTAL)
783 #endif
784   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
789 {
790   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
791   PetscErrorCode ierr;
792 
793   PetscFunctionBegin;
794   switch (op) {
795   case MAT_ROW_ORIENTED:
796     a->roworiented = flg;
797     break;
798   case MAT_KEEP_NONZERO_PATTERN:
799     a->keepnonzeropattern = flg;
800     break;
801   case MAT_NEW_NONZERO_LOCATIONS:
802     a->nonew = (flg ? 0 : 1);
803     break;
804   case MAT_NEW_NONZERO_LOCATION_ERR:
805     a->nonew = (flg ? -1 : 0);
806     break;
807   case MAT_NEW_NONZERO_ALLOCATION_ERR:
808     a->nonew = (flg ? -2 : 0);
809     break;
810   case MAT_UNUSED_NONZERO_LOCATION_ERR:
811     a->nounused = (flg ? -1 : 0);
812     break;
813   case MAT_NEW_DIAGONALS:
814   case MAT_IGNORE_OFF_PROC_ENTRIES:
815   case MAT_USE_HASH_TABLE:
816     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
817     break;
818   case MAT_SPD:
819   case MAT_SYMMETRIC:
820   case MAT_STRUCTURALLY_SYMMETRIC:
821   case MAT_HERMITIAN:
822   case MAT_SYMMETRY_ETERNAL:
823     /* These options are handled directly by MatSetOption() */
824     break;
825   default:
826     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
832 {
833   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
834   PetscInt       i,j,n,shift;
835   PetscScalar    *x,zero=0.0;
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
840   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
841 
842   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
843     PetscInt *diag=a->diag;
844     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
845     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
846     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
847     PetscFunctionReturn(0);
848   }
849 
850   ierr = VecSet(v,zero);CHKERRQ(ierr);
851   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
852   for (i=0; i<n; i++) { /* loop over rows */
853     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
854     x[i] = 0;
855     for (j=0; j<a->rlen[i]; j++) {
856       if (a->colidx[shift+j*8] == i) {
857         x[i] = a->val[shift+j*8];
858         break;
859       }
860     }
861   }
862   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
867 {
868   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
869   const PetscScalar *l,*r;
870   PetscInt          i,j,m,n,row;
871   PetscErrorCode    ierr;
872 
873   PetscFunctionBegin;
874   if (ll) {
875     /* The local size is used so that VecMPI can be passed to this routine
876        by MatDiagonalScale_MPISELL */
877     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
878     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
879     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
880     for (i=0; i<a->totalslices; i++) { /* loop over slices */
881       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
882         a->val[j] *= l[8*i+row];
883       }
884     }
885     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
886     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
887   }
888   if (rr) {
889     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
890     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
891     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
892     for (i=0; i<a->totalslices; i++) { /* loop over slices */
893       for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
894         a->val[j] *= r[a->colidx[j]];
895       }
896     }
897     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
898     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
899   }
900   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
905 
906 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
907 {
908   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
909   PetscInt    *cp,i,k,low,high,t,row,col,l;
910   PetscInt    shift;
911   MatScalar   *vp;
912 
913   PetscFunctionBegin;
914   for (k=0; k<m; k++) { /* loop over added rows */
915     row = im[k];
916     if (row<0) continue;
917 #if defined(PETSC_USE_DEBUG)
918     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
919 #endif
920     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
921     cp = a->colidx+shift; /* pointer to the row */
922     vp = a->val+shift; /* pointer to the row */
923     for (l=0; l<n; l++) { /* loop over added rows */
924       col = in[l];
925       if (col<0) continue;
926 #if defined(PETSC_USE_DEBUG)
927       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
928 #endif
929       high = a->rlen[row]; low = 0; /* assume unsorted */
930       while (high-low > 5) {
931         t = (low+high)/2;
932         if (*(cp+t*8) > col) high = t;
933         else low = t;
934       }
935       for (i=low; i<high; i++) {
936         if (*(cp+8*i) > col) break;
937         if (*(cp+8*i) == col) {
938           *v++ = *(vp+8*i);
939           goto finished;
940         }
941       }
942       *v++ = 0.0;
943     finished:;
944     }
945   }
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
950 {
951   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
952   PetscInt          i,j,m=A->rmap->n,shift;
953   const char        *name;
954   PetscViewerFormat format;
955   PetscErrorCode    ierr;
956 
957   PetscFunctionBegin;
958   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
959   if (format == PETSC_VIEWER_ASCII_MATLAB) {
960     PetscInt nofinalvalue = 0;
961     /*
962     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
963       nofinalvalue = 1;
964     }
965     */
966     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
967     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
969 #if defined(PETSC_USE_COMPLEX)
970     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
971 #else
972     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
973 #endif
974     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
975 
976     for (i=0; i<m; i++) {
977       shift = a->sliidx[i>>3]+(i&0x07);
978       for (j=0; j<a->rlen[i]; j++) {
979 #if defined(PETSC_USE_COMPLEX)
980         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
981 #else
982         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
983 #endif
984       }
985     }
986     /*
987     if (nofinalvalue) {
988 #if defined(PETSC_USE_COMPLEX)
989       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
990 #else
991       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
992 #endif
993     }
994     */
995     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
996     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
997     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
998   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
999     PetscFunctionReturn(0);
1000   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1001     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1002     for (i=0; i<m; i++) {
1003       ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1004       shift = a->sliidx[i>>3]+(i&0x07);
1005       for (j=0; j<a->rlen[i]; j++) {
1006 #if defined(PETSC_USE_COMPLEX)
1007         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1008           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1009         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1010           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1011         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1012           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1013         }
1014 #else
1015         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
1016 #endif
1017       }
1018       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1019     }
1020     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1021   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1022     PetscInt    cnt=0,jcnt;
1023     PetscScalar value;
1024 #if defined(PETSC_USE_COMPLEX)
1025     PetscBool   realonly=PETSC_TRUE;
1026     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1027       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1028         realonly = PETSC_FALSE;
1029         break;
1030       }
1031     }
1032 #endif
1033 
1034     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1035     for (i=0; i<m; i++) {
1036       jcnt = 0;
1037       shift = a->sliidx[i>>3]+(i&0x07);
1038       for (j=0; j<A->cmap->n; j++) {
1039         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1040           value = a->val[cnt++];
1041           jcnt++;
1042         } else {
1043           value = 0.0;
1044         }
1045 #if defined(PETSC_USE_COMPLEX)
1046         if (realonly) {
1047           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
1048         } else {
1049           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
1050         }
1051 #else
1052         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
1053 #endif
1054       }
1055       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1056     }
1057     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1058   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1059     PetscInt fshift=1;
1060     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1061 #if defined(PETSC_USE_COMPLEX)
1062     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
1063 #else
1064     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
1065 #endif
1066     ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
1067     for (i=0; i<m; i++) {
1068       shift = a->sliidx[i>>3]+(i&0x07);
1069       for (j=0; j<a->rlen[i]; j++) {
1070 #if defined(PETSC_USE_COMPLEX)
1071         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1072 #else
1073         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1074 #endif
1075       }
1076     }
1077     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1078   } else {
1079     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1080     if (A->factortype) {
1081       for (i=0; i<m; i++) {
1082         shift = a->sliidx[i>>3]+(i&0x07);
1083         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1084         /* L part */
1085         for (j=shift; j<a->diag[i]; j+=8) {
1086 #if defined(PETSC_USE_COMPLEX)
1087           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1088             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1089           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1090             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1091           } else {
1092             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1093           }
1094 #else
1095           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1096 #endif
1097         }
1098         /* diagonal */
1099         j = a->diag[i];
1100 #if defined(PETSC_USE_COMPLEX)
1101         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1102           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1103         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1104           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1105         } else {
1106           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1107         }
1108 #else
1109         ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1110 #endif
1111 
1112         /* U part */
1113         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1114 #if defined(PETSC_USE_COMPLEX)
1115           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1116             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1117           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1118             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1119           } else {
1120             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1121           }
1122 #else
1123           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1124 #endif
1125         }
1126         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1127       }
1128     } else {
1129       for (i=0; i<m; i++) {
1130         shift = a->sliidx[i>>3]+(i&0x07);
1131         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1132         for (j=0; j<a->rlen[i]; j++) {
1133 #if defined(PETSC_USE_COMPLEX)
1134           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1135             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1136           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1137             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1138           } else {
1139             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1140           }
1141 #else
1142           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1143 #endif
1144         }
1145         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1146       }
1147     }
1148     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1149   }
1150   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #include <petscdraw.h>
1155 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1156 {
1157   Mat               A=(Mat)Aa;
1158   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1159   PetscInt          i,j,m=A->rmap->n,shift;
1160   int               color;
1161   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1162   PetscViewer       viewer;
1163   PetscViewerFormat format;
1164   PetscErrorCode    ierr;
1165 
1166   PetscFunctionBegin;
1167   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1168   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1169   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1170 
1171   /* loop over matrix elements drawing boxes */
1172 
1173   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1174     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1175     /* Blue for negative, Cyan for zero and  Red for positive */
1176     color = PETSC_DRAW_BLUE;
1177     for (i=0; i<m; i++) {
1178       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1179       y_l = m - i - 1.0; y_r = y_l + 1.0;
1180       for (j=0; j<a->rlen[i]; j++) {
1181         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1182         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1183         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1184       }
1185     }
1186     color = PETSC_DRAW_CYAN;
1187     for (i=0; i<m; i++) {
1188       shift = a->sliidx[i>>3]+(i&0x07);
1189       y_l = m - i - 1.0; y_r = y_l + 1.0;
1190       for (j=0; j<a->rlen[i]; j++) {
1191         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1192         if (a->val[shift+8*j] !=  0.) continue;
1193         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1194       }
1195     }
1196     color = PETSC_DRAW_RED;
1197     for (i=0; i<m; i++) {
1198       shift = a->sliidx[i>>3]+(i&0x07);
1199       y_l = m - i - 1.0; y_r = y_l + 1.0;
1200       for (j=0; j<a->rlen[i]; j++) {
1201         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1202         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1203         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1204       }
1205     }
1206     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1207   } else {
1208     /* use contour shading to indicate magnitude of values */
1209     /* first determine max of all nonzero values */
1210     PetscReal minv=0.0,maxv=0.0;
1211     PetscInt  count=0;
1212     PetscDraw popup;
1213     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1214       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1215     }
1216     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1217     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1218     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1219 
1220     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1221     for (i=0; i<m; i++) {
1222       shift = a->sliidx[i>>3]+(i&0x07);
1223       y_l = m - i - 1.0;
1224       y_r = y_l + 1.0;
1225       for (j=0; j<a->rlen[i]; j++) {
1226         x_l = a->colidx[shift+j*8];
1227         x_r = x_l + 1.0;
1228         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1229         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1230         count++;
1231       }
1232     }
1233     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #include <petscdraw.h>
1239 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1240 {
1241   PetscDraw      draw;
1242   PetscReal      xr,yr,xl,yl,h,w;
1243   PetscBool      isnull;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1248   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1249   if (isnull) PetscFunctionReturn(0);
1250 
1251   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1252   xr  += w;          yr += h;         xl = -w;     yl = -h;
1253   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1254   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1255   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1256   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1257   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1262 {
1263   PetscBool      iascii,isbinary,isdraw;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1269   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1270   if (iascii) {
1271     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1272   } else if (isbinary) {
1273     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1274   } else if (isdraw) {
1275     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1281 {
1282   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1283   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1284   MatScalar      *vp;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1289   /* To do: compress out the unused elements */
1290   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1291   ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1292   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1293   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1294   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1295   for (i=0; i<a->totalslices; ++i) {
1296     shift = a->sliidx[i];    /* starting index of the slice */
1297     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1298     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1299     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1300       row  = 8*i + row_in_slice;
1301       nrow = a->rlen[row]; /* number of nonzeros in row */
1302       /*
1303         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1304         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1305       */
1306       lastcol = 0;
1307       if (nrow>0) { /* nonempty row */
1308         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1309       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1310         for (j=1;j<8;j++) {
1311           if (a->rlen[8*i+j]) {
1312             lastcol = cp[j];
1313             break;
1314           }
1315         }
1316       } else {
1317         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1318       }
1319 
1320       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1321         cp[8*k+row_in_slice] = lastcol;
1322         vp[8*k+row_in_slice] = (MatScalar)0;
1323       }
1324     }
1325   }
1326 
1327   A->info.mallocs += a->reallocs;
1328   a->reallocs      = 0;
1329 
1330   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1335 {
1336   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1337 
1338   PetscFunctionBegin;
1339   info->block_size   = 1.0;
1340   info->nz_allocated = (double)a->maxallocmat;
1341   info->nz_used      = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1342   info->nz_unneeded  = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1343   info->assemblies   = (double)A->num_ass;
1344   info->mallocs      = (double)A->info.mallocs;
1345   info->memory       = ((PetscObject)A)->mem;
1346   if (A->factortype) {
1347     info->fill_ratio_given  = A->info.fill_ratio_given;
1348     info->fill_ratio_needed = A->info.fill_ratio_needed;
1349     info->factor_mallocs    = A->info.factor_mallocs;
1350   } else {
1351     info->fill_ratio_given  = 0;
1352     info->fill_ratio_needed = 0;
1353     info->factor_mallocs    = 0;
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1359 {
1360   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1361   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1362   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1363   MatScalar      *vp,value;
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   for (k=0; k<m; k++) { /* loop over added rows */
1368     row = im[k];
1369     if (row < 0) continue;
1370 #if defined(PETSC_USE_DEBUG)
1371     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1372 #endif
1373     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1374     cp    = a->colidx+shift; /* pointer to the row */
1375     vp    = a->val+shift; /* pointer to the row */
1376     nrow  = a->rlen[row];
1377     low   = 0;
1378     high  = nrow;
1379 
1380     for (l=0; l<n; l++) { /* loop over added columns */
1381       col = in[l];
1382       if (col<0) continue;
1383 #if defined(PETSC_USE_DEBUG)
1384       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1385 #endif
1386       if (a->roworiented) {
1387         value = v[l+k*n];
1388       } else {
1389         value = v[k+l*m];
1390       }
1391       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1392 
1393       /* search in this row for the specified colmun, i indicates the column to be set */
1394       if (col <= lastcol) low = 0;
1395       else high = nrow;
1396       lastcol = col;
1397       while (high-low > 5) {
1398         t = (low+high)/2;
1399         if (*(cp+t*8) > col) high = t;
1400         else low = t;
1401       }
1402       for (i=low; i<high; i++) {
1403         if (*(cp+i*8) > col) break;
1404         if (*(cp+i*8) == col) {
1405           if (is == ADD_VALUES) *(vp+i*8) += value;
1406           else *(vp+i*8) = value;
1407           low = i + 1;
1408           goto noinsert;
1409         }
1410       }
1411       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1412       if (nonew == 1) goto noinsert;
1413       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1414       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1415       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1416       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1417       for (ii=nrow-1; ii>=i; ii--) {
1418         *(cp+(ii+1)*8) = *(cp+ii*8);
1419         *(vp+(ii+1)*8) = *(vp+ii*8);
1420       }
1421       a->rlen[row]++;
1422       *(cp+i*8) = col;
1423       *(vp+i*8) = value;
1424       a->nz++;
1425       A->nonzerostate++;
1426       low = i+1; high++; nrow++;
1427 noinsert:;
1428     }
1429     a->rlen[row] = nrow;
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1435 {
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   /* If the two matrices have the same copy implementation, use fast copy. */
1440   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1441     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1442     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1443 
1444     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1445     ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1446   } else {
1447     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1453 {
1454   PetscErrorCode ierr;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr);
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1462 {
1463   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1464 
1465   PetscFunctionBegin;
1466   *array = a->val;
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1471 {
1472   PetscFunctionBegin;
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1477 {
1478   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1479   PetscInt    i;
1480   MatScalar   *aval=a->val;
1481 
1482   PetscFunctionBegin;
1483   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1488 {
1489   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1490   PetscInt       i;
1491   MatScalar      *aval=a->val;
1492   PetscErrorCode ierr;
1493 
1494   PetscFunctionBegin;
1495   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1496   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1501 {
1502   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1503   MatScalar      *aval=a->val;
1504   PetscScalar    oalpha=alpha;
1505   PetscBLASInt   one=1,size;
1506   PetscErrorCode ierr;
1507 
1508   PetscFunctionBegin;
1509   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1510   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1511   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1512   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1517 {
1518   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   if (!Y->preallocated || !y->nz) {
1523     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1524   }
1525   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1530 {
1531   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1532   PetscScalar       *x,sum,*t;
1533   const MatScalar   *idiag=0,*mdiag;
1534   const PetscScalar *b,*xb;
1535   PetscInt          n,m=A->rmap->n,i,j,shift;
1536   const PetscInt    *diag;
1537   PetscErrorCode    ierr;
1538 
1539   PetscFunctionBegin;
1540   its = its*lits;
1541 
1542   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1543   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1544   a->fshift = fshift;
1545   a->omega  = omega;
1546 
1547   diag  = a->diag;
1548   t     = a->ssor_work;
1549   idiag = a->idiag;
1550   mdiag = a->mdiag;
1551 
1552   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1553   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1554   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1555   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1556   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1557   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1558 
1559   if (flag & SOR_ZERO_INITIAL_GUESS) {
1560     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1561       for (i=0; i<m; i++) {
1562         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1563         sum   = b[i];
1564         n     = (diag[i]-shift)/8;
1565         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1566         t[i]  = sum;
1567         x[i]  = sum*idiag[i];
1568       }
1569       xb   = t;
1570       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1571     } else xb = b;
1572     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1573       for (i=m-1; i>=0; i--) {
1574         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1575         sum   = xb[i];
1576         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1577         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1578         if (xb == b) {
1579           x[i] = sum*idiag[i];
1580         } else {
1581           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1582         }
1583       }
1584       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1585     }
1586     its--;
1587   }
1588   while (its--) {
1589     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1590       for (i=0; i<m; i++) {
1591         /* lower */
1592         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1593         sum   = b[i];
1594         n     = (diag[i]-shift)/8;
1595         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1596         t[i]  = sum;             /* save application of the lower-triangular part */
1597         /* upper */
1598         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1599         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1600         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1601       }
1602       xb   = t;
1603       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1604     } else xb = b;
1605     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1606       for (i=m-1; i>=0; i--) {
1607         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1608         sum = xb[i];
1609         if (xb == b) {
1610           /* whole matrix (no checkpointing available) */
1611           n     = a->rlen[i];
1612           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1613           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1614         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1615           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1616           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1617           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1618         }
1619       }
1620       if (xb == b) {
1621         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1622       } else {
1623         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1624       }
1625     }
1626   }
1627   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1628   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 /* -------------------------------------------------------------------*/
1633 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1634                                        0,
1635                                        0,
1636                                        MatMult_SeqSELL,
1637                                /* 4*/  MatMultAdd_SeqSELL,
1638                                        MatMultTranspose_SeqSELL,
1639                                        MatMultTransposeAdd_SeqSELL,
1640                                        0,
1641                                        0,
1642                                        0,
1643                                /* 10*/ 0,
1644                                        0,
1645                                        0,
1646                                        MatSOR_SeqSELL,
1647                                        0,
1648                                /* 15*/ MatGetInfo_SeqSELL,
1649                                        MatEqual_SeqSELL,
1650                                        MatGetDiagonal_SeqSELL,
1651                                        MatDiagonalScale_SeqSELL,
1652                                        0,
1653                                /* 20*/ 0,
1654                                        MatAssemblyEnd_SeqSELL,
1655                                        MatSetOption_SeqSELL,
1656                                        MatZeroEntries_SeqSELL,
1657                                /* 24*/ 0,
1658                                        0,
1659                                        0,
1660                                        0,
1661                                        0,
1662                                /* 29*/ MatSetUp_SeqSELL,
1663                                        0,
1664                                        0,
1665                                        0,
1666                                        0,
1667                                /* 34*/ MatDuplicate_SeqSELL,
1668                                        0,
1669                                        0,
1670                                        0,
1671                                        0,
1672                                /* 39*/ 0,
1673                                        0,
1674                                        0,
1675                                        MatGetValues_SeqSELL,
1676                                        MatCopy_SeqSELL,
1677                                /* 44*/ 0,
1678                                        MatScale_SeqSELL,
1679                                        MatShift_SeqSELL,
1680                                        0,
1681                                        0,
1682                                /* 49*/ 0,
1683                                        0,
1684                                        0,
1685                                        0,
1686                                        0,
1687                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1688                                        0,
1689                                        0,
1690                                        0,
1691                                        0,
1692                                /* 59*/ 0,
1693                                        MatDestroy_SeqSELL,
1694                                        MatView_SeqSELL,
1695                                        0,
1696                                        0,
1697                                /* 64*/ 0,
1698                                        0,
1699                                        0,
1700                                        0,
1701                                        0,
1702                                /* 69*/ 0,
1703                                        0,
1704                                        0,
1705                                        0,
1706                                        0,
1707                                /* 74*/ 0,
1708                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1709                                        0,
1710                                        0,
1711                                        0,
1712                                /* 79*/ 0,
1713                                        0,
1714                                        0,
1715                                        0,
1716                                        0,
1717                                /* 84*/ 0,
1718                                        0,
1719                                        0,
1720                                        0,
1721                                        0,
1722                                /* 89*/ 0,
1723                                        0,
1724                                        0,
1725                                        0,
1726                                        0,
1727                                /* 94*/ 0,
1728                                        0,
1729                                        0,
1730                                        0,
1731                                        0,
1732                                /* 99*/ 0,
1733                                        0,
1734                                        0,
1735                                        MatConjugate_SeqSELL,
1736                                        0,
1737                                /*104*/ 0,
1738                                        0,
1739                                        0,
1740                                        0,
1741                                        0,
1742                                /*109*/ 0,
1743                                        0,
1744                                        0,
1745                                        0,
1746                                        MatMissingDiagonal_SeqSELL,
1747                                /*114*/ 0,
1748                                        0,
1749                                        0,
1750                                        0,
1751                                        0,
1752                                /*119*/ 0,
1753                                        0,
1754                                        0,
1755                                        0,
1756                                        0,
1757                                /*124*/ 0,
1758                                        0,
1759                                        0,
1760                                        0,
1761                                        0,
1762                                /*129*/ 0,
1763                                        0,
1764                                        0,
1765                                        0,
1766                                        0,
1767                                /*134*/ 0,
1768                                        0,
1769                                        0,
1770                                        0,
1771                                        0,
1772                                /*139*/ 0,
1773                                        0,
1774                                        0,
1775                                        MatFDColoringSetUp_SeqXAIJ,
1776                                        0,
1777                                 /*144*/0
1778 };
1779 
1780 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1781 {
1782   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1787 
1788   /* allocate space for values if not already there */
1789   if (!a->saved_values) {
1790     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1791     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1792   }
1793 
1794   /* copy values over */
1795   ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1800 {
1801   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1802   PetscErrorCode ierr;
1803 
1804   PetscFunctionBegin;
1805   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1806   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1807   /* copy values over */
1808   ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 /*@C
1813  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1814 
1815  Not Collective
1816 
1817  Input Parameters:
1818  .  mat - a MATSEQSELL matrix
1819  .  array - pointer to the data
1820 
1821  Level: intermediate
1822 
1823  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1824  @*/
1825 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1835 {
1836   Mat_SeqSELL    *b;
1837   PetscMPIInt    size;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1842   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1843 
1844   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1845 
1846   B->data = (void*)b;
1847 
1848   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1849 
1850   b->row                = 0;
1851   b->col                = 0;
1852   b->icol               = 0;
1853   b->reallocs           = 0;
1854   b->ignorezeroentries  = PETSC_FALSE;
1855   b->roworiented        = PETSC_TRUE;
1856   b->nonew              = 0;
1857   b->diag               = 0;
1858   b->solve_work         = 0;
1859   B->spptr              = 0;
1860   b->saved_values       = 0;
1861   b->idiag              = 0;
1862   b->mdiag              = 0;
1863   b->ssor_work          = 0;
1864   b->omega              = 1.0;
1865   b->fshift             = 0.0;
1866   b->idiagvalid         = PETSC_FALSE;
1867   b->ibdiagvalid        = PETSC_FALSE;
1868   b->keepnonzeropattern = PETSC_FALSE;
1869 
1870   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 /*
1881  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1882  */
1883 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1884 {
1885   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1886   PetscInt       i,m=A->rmap->n;
1887   PetscInt       totalslices=a->totalslices;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   c = (Mat_SeqSELL*)C->data;
1892 
1893   C->factortype = A->factortype;
1894   c->row        = 0;
1895   c->col        = 0;
1896   c->icol       = 0;
1897   c->reallocs   = 0;
1898 
1899   C->assembled = PETSC_TRUE;
1900 
1901   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
1902   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
1903 
1904   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
1905   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1906   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
1907   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
1908 
1909   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
1910   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
1911 
1912   /* allocate the matrix space */
1913   if (mallocmatspace) {
1914     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
1915     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1916 
1917     c->singlemalloc = PETSC_TRUE;
1918 
1919     if (m > 0) {
1920       ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr);
1921       if (cpvalues == MAT_COPY_VALUES) {
1922         ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1923       } else {
1924         ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1925       }
1926     }
1927   }
1928 
1929   c->ignorezeroentries = a->ignorezeroentries;
1930   c->roworiented       = a->roworiented;
1931   c->nonew             = a->nonew;
1932   if (a->diag) {
1933     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
1934     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1935     for (i=0; i<m; i++) {
1936       c->diag[i] = a->diag[i];
1937     }
1938   } else c->diag = 0;
1939 
1940   c->solve_work         = 0;
1941   c->saved_values       = 0;
1942   c->idiag              = 0;
1943   c->ssor_work          = 0;
1944   c->keepnonzeropattern = a->keepnonzeropattern;
1945   c->free_val           = PETSC_TRUE;
1946   c->free_colidx        = PETSC_TRUE;
1947 
1948   c->maxallocmat  = a->maxallocmat;
1949   c->maxallocrow  = a->maxallocrow;
1950   c->rlenmax      = a->rlenmax;
1951   c->nz           = a->nz;
1952   C->preallocated = PETSC_TRUE;
1953 
1954   c->nonzerorowcnt = a->nonzerorowcnt;
1955   C->nonzerostate  = A->nonzerostate;
1956 
1957   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
1962 {
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
1967   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
1968   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
1969     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
1970   }
1971   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1972   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /*@C
1977  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
1978 
1979  Collective on MPI_Comm
1980 
1981  Input Parameters:
1982  +  comm - MPI communicator, set to PETSC_COMM_SELF
1983  .  m - number of rows
1984  .  n - number of columns
1985  .  rlenmax - maximum number of nonzeros in a row
1986  -  rlen - array containing the number of nonzeros in the various rows
1987  (possibly different for each row) or NULL
1988 
1989  Output Parameter:
1990  .  A - the matrix
1991 
1992  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1993  MatXXXXSetPreallocation() paradgm instead of this routine directly.
1994  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
1995 
1996  Notes:
1997  If nnz is given then nz is ignored
1998 
1999  Specify the preallocated storage with either rlenmax or rlen (not both).
2000  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2001  allocation.  For large problems you MUST preallocate memory or you
2002  will get TERRIBLE performance, see the users' manual chapter on matrices.
2003 
2004  Level: intermediate
2005 
2006  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
2007 
2008  @*/
2009 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2010 {
2011   PetscErrorCode ierr;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2015   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2016   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
2017   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2022 {
2023   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2024   PetscInt       totalslices=a->totalslices;
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   /* If the  matrix dimensions are not equal,or no of nonzeros */
2029   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2030     *flg = PETSC_FALSE;
2031     PetscFunctionReturn(0);
2032   }
2033   /* if the a->colidx are the same */
2034   ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr);
2035   if (!*flg) PetscFunctionReturn(0);
2036   /* if a->val are the same */
2037   ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2042 {
2043   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2044 
2045   PetscFunctionBegin;
2046   a->idiagvalid  = PETSC_FALSE;
2047   a->ibdiagvalid = PETSC_FALSE;
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatConjugate_SeqSELL(Mat A)
2052 {
2053 #if defined(PETSC_USE_COMPLEX)
2054   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2055   PetscInt    i;
2056   PetscScalar *val = a->val;
2057 
2058   PetscFunctionBegin;
2059   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2060     val[i] = PetscConj(val[i]);
2061   }
2062 #else
2063   PetscFunctionBegin;
2064 #endif
2065   PetscFunctionReturn(0);
2066 }
2067