1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 9 10 #include <immintrin.h> 11 12 #if !defined(_MM_SCALE_8) 13 #define _MM_SCALE_8 8 14 #endif 15 16 #if defined(__AVX512F__) 17 /* these do not work 18 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 19 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 20 */ 21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 22 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 23 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 24 vec_vals = _mm512_loadu_pd(aval); \ 25 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 26 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 27 #elif defined(__AVX2__) && defined(__FMA__) 28 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 29 vec_vals = _mm256_loadu_pd(aval); \ 30 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 31 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 32 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 33 #endif 34 #endif /* PETSC_HAVE_IMMINTRIN_H */ 35 36 /*@C 37 MatSeqSELLSetPreallocation - For good matrix assembly performance 38 the user should preallocate the matrix storage by setting the parameter nz 39 (or the array nnz). By setting these parameters accurately, performance 40 during matrix assembly can be increased significantly. 41 42 Collective on MPI_Comm 43 44 Input Parameters: 45 + B - The matrix 46 . nz - number of nonzeros per row (same for all rows) 47 - nnz - array containing the number of nonzeros in the various rows 48 (possibly different for each row) or NULL 49 50 Notes: 51 If nnz is given then nz is ignored. 52 53 Specify the preallocated storage with either nz or nnz (not both). 54 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 55 allocation. For large problems you MUST preallocate memory or you 56 will get TERRIBLE performance, see the users' manual chapter on matrices. 57 58 You can call MatGetInfo() to get information on how effective the preallocation was; 59 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 60 You can also run with the option -info and look for messages with the string 61 malloc in them to see if additional memory allocation was needed. 62 63 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 64 entries or columns indices. 65 66 The maximum number of nonzeos in any row should be as accuate as possible. 67 If it is underesitmated, you will get bad performance due to reallocation 68 (MatSeqXSELLReallocateSELL). 69 70 Level: intermediate 71 72 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 73 74 @*/ 75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 81 PetscValidType(B,1); 82 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 87 { 88 Mat_SeqSELL *b; 89 PetscInt i,j,totalslices; 90 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 95 if (maxallocrow == MAT_SKIP_ALLOCATION) { 96 skipallocation = PETSC_TRUE; 97 maxallocrow = 0; 98 } 99 100 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 101 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 102 103 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 104 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 105 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 106 if (rlen) { 107 for (i=0; i<B->rmap->n; i++) { 108 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 109 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 110 } 111 } 112 113 B->preallocated = PETSC_TRUE; 114 115 b = (Mat_SeqSELL*)B->data; 116 117 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 118 b->totalslices = totalslices; 119 if (!skipallocation) { 120 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 121 122 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 123 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 125 } 126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 128 else if (maxallocrow < 0) maxallocrow = 1; 129 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 130 } else { 131 maxallocrow = 0; 132 b->sliidx[0] = 0; 133 for (i=1; i<totalslices; i++) { 134 b->sliidx[i] = 0; 135 for (j=0;j<8;j++) { 136 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 137 } 138 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 139 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 140 } 141 /* last slice */ 142 b->sliidx[totalslices] = 0; 143 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 144 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 145 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 146 } 147 148 /* allocate space for val, colidx, rlen */ 149 /* FIXME: should B's old memory be unlogged? */ 150 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 151 /* FIXME: assuming an element of the bit array takes 8 bits */ 152 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 153 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 154 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 155 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 156 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 157 158 b->singlemalloc = PETSC_TRUE; 159 b->free_val = PETSC_TRUE; 160 b->free_colidx = PETSC_TRUE; 161 } else { 162 b->free_val = PETSC_FALSE; 163 b->free_colidx = PETSC_FALSE; 164 } 165 166 b->nz = 0; 167 b->maxallocrow = maxallocrow; 168 b->rlenmax = maxallocrow; 169 b->maxallocmat = b->sliidx[totalslices]; 170 B->info.nz_unneeded = (double)b->maxallocmat; 171 if (realalloc) { 172 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 178 { 179 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 180 PetscInt shift; 181 182 PetscFunctionBegin; 183 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 184 if (nz) *nz = a->rlen[row]; 185 shift = a->sliidx[row>>3]+(row&0x07); 186 if (!a->getrowcols) { 187 PetscErrorCode ierr; 188 189 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 190 } 191 if (idx) { 192 PetscInt j; 193 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 194 *idx = a->getrowcols; 195 } 196 if (v) { 197 PetscInt j; 198 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 199 *v = a->getrowvals; 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 205 { 206 PetscFunctionBegin; 207 PetscFunctionReturn(0); 208 } 209 210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 211 { 212 Mat B; 213 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 214 PetscInt i; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (reuse == MAT_REUSE_MATRIX) { 219 B = *newmat; 220 ierr = MatZeroEntries(B);CHKERRQ(ierr); 221 } else { 222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 224 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 226 } 227 228 for (i=0; i<A->rmap->n; i++) { 229 PetscInt nz,*cols; 230 PetscScalar *vals; 231 232 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 233 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 234 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 235 } 236 237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 B->rmap->bs = A->rmap->bs; 240 241 if (reuse == MAT_INPLACE_MATRIX) { 242 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 243 } else { 244 *newmat = B; 245 } 246 PetscFunctionReturn(0); 247 } 248 249 #include <../src/mat/impls/aij/seq/aij.h> 250 251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 252 { 253 Mat B; 254 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 255 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 256 const PetscInt *cols; 257 const PetscScalar *vals; 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 if (A->rmap->bs > 1) { 262 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266 if (reuse == MAT_REUSE_MATRIX) { 267 B = *newmat; 268 } else { 269 /* Can we just use ilen? */ 270 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 271 for (i=0; i<m; i++) { 272 rowlengths[i] = ai[i+1] - ai[i]; 273 } 274 275 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 276 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 277 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 278 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 279 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 280 } 281 282 for (row=0; row<m; row++) { 283 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 284 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 285 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 286 } 287 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 289 B->rmap->bs = A->rmap->bs; 290 291 if (reuse == MAT_INPLACE_MATRIX) { 292 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 293 } else { 294 *newmat = B; 295 } 296 PetscFunctionReturn(0); 297 } 298 299 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 300 { 301 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 302 PetscScalar *y; 303 const PetscScalar *x; 304 const MatScalar *aval=a->val; 305 PetscInt totalslices=a->totalslices; 306 const PetscInt *acolidx=a->colidx; 307 PetscInt i,j; 308 PetscErrorCode ierr; 309 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 310 __m512d vec_x,vec_y,vec_vals; 311 __m256i vec_idx; 312 __mmask8 mask; 313 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 314 __m256i vec_idx2,vec_idx3,vec_idx4; 315 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 316 __m128i vec_idx; 317 __m256d vec_x,vec_y,vec_y2,vec_vals; 318 MatScalar yval; 319 PetscInt r,rows_left,row,nnz_in_row; 320 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 321 __m128d vec_x_tmp; 322 __m256d vec_x,vec_y,vec_y2,vec_vals; 323 MatScalar yval; 324 PetscInt r,rows_left,row,nnz_in_row; 325 #else 326 PetscScalar sum[8]; 327 #endif 328 329 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 330 #pragma disjoint(*x,*y,*aval) 331 #endif 332 333 PetscFunctionBegin; 334 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 335 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 336 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 337 for (i=0; i<totalslices; i++) { /* loop over slices */ 338 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 339 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 340 341 vec_y = _mm512_setzero_pd(); 342 vec_y2 = _mm512_setzero_pd(); 343 vec_y3 = _mm512_setzero_pd(); 344 vec_y4 = _mm512_setzero_pd(); 345 346 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 347 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 348 case 3: 349 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 350 acolidx += 8; aval += 8; 351 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 352 acolidx += 8; aval += 8; 353 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 354 acolidx += 8; aval += 8; 355 j += 3; 356 break; 357 case 2: 358 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 359 acolidx += 8; aval += 8; 360 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 361 acolidx += 8; aval += 8; 362 j += 2; 363 break; 364 case 1: 365 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 366 acolidx += 8; aval += 8; 367 j += 1; 368 break; 369 } 370 #pragma novector 371 for (; j<(a->sliidx[i+1]>>3); j+=4) { 372 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 373 acolidx += 8; aval += 8; 374 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 375 acolidx += 8; aval += 8; 376 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 377 acolidx += 8; aval += 8; 378 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 379 acolidx += 8; aval += 8; 380 } 381 382 vec_y = _mm512_add_pd(vec_y,vec_y2); 383 vec_y = _mm512_add_pd(vec_y,vec_y3); 384 vec_y = _mm512_add_pd(vec_y,vec_y4); 385 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 386 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 387 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 388 } else { 389 _mm512_storeu_pd(&y[8*i],vec_y); 390 } 391 } 392 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 393 for (i=0; i<totalslices; i++) { /* loop over full slices */ 394 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 395 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 396 397 /* last slice may have padding rows. Don't use vectorization. */ 398 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 399 rows_left = A->rmap->n - 8*i; 400 for (r=0; r<rows_left; ++r) { 401 yval = (MatScalar)0; 402 row = 8*i + r; 403 nnz_in_row = a->rlen[row]; 404 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 405 y[row] = yval; 406 } 407 break; 408 } 409 410 vec_y = _mm256_setzero_pd(); 411 vec_y2 = _mm256_setzero_pd(); 412 413 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 414 #pragma novector 415 #pragma unroll(2) 416 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 417 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 418 aval += 4; acolidx += 4; 419 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 420 aval += 4; acolidx += 4; 421 } 422 423 _mm256_storeu_pd(y+i*8,vec_y); 424 _mm256_storeu_pd(y+i*8+4,vec_y2); 425 } 426 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 427 for (i=0; i<totalslices; i++) { /* loop over full slices */ 428 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 429 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 430 431 vec_y = _mm256_setzero_pd(); 432 vec_y2 = _mm256_setzero_pd(); 433 434 /* last slice may have padding rows. Don't use vectorization. */ 435 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 436 rows_left = A->rmap->n - 8*i; 437 for (r=0; r<rows_left; ++r) { 438 yval = (MatScalar)0; 439 row = 8*i + r; 440 nnz_in_row = a->rlen[row]; 441 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 442 y[row] = yval; 443 } 444 break; 445 } 446 447 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 448 #pragma novector 449 #pragma unroll(2) 450 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 451 vec_vals = _mm256_loadu_pd(aval); 452 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 453 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 454 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 455 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 456 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 457 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 458 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 459 aval += 4; 460 461 vec_vals = _mm256_loadu_pd(aval); 462 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 463 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 464 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 465 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 466 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 467 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 468 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 469 aval += 4; 470 } 471 472 _mm256_storeu_pd(y + i*8, vec_y); 473 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 474 } 475 #else 476 for (i=0; i<totalslices; i++) { /* loop over slices */ 477 for (j=0; j<8; j++) sum[j] = 0.0; 478 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 479 sum[0] += aval[j] * x[acolidx[j]]; 480 sum[1] += aval[j+1] * x[acolidx[j+1]]; 481 sum[2] += aval[j+2] * x[acolidx[j+2]]; 482 sum[3] += aval[j+3] * x[acolidx[j+3]]; 483 sum[4] += aval[j+4] * x[acolidx[j+4]]; 484 sum[5] += aval[j+5] * x[acolidx[j+5]]; 485 sum[6] += aval[j+6] * x[acolidx[j+6]]; 486 sum[7] += aval[j+7] * x[acolidx[j+7]]; 487 } 488 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 489 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 490 } else { 491 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 492 } 493 } 494 #endif 495 496 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 497 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 498 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 503 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 504 { 505 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 506 PetscScalar *y,*z; 507 const PetscScalar *x; 508 const MatScalar *aval=a->val; 509 PetscInt totalslices=a->totalslices; 510 const PetscInt *acolidx=a->colidx; 511 PetscInt i,j; 512 PetscErrorCode ierr; 513 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 514 __m512d vec_x,vec_y,vec_vals; 515 __m256i vec_idx; 516 __mmask8 mask; 517 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 518 __m256i vec_idx2,vec_idx3,vec_idx4; 519 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 520 __m128d vec_x_tmp; 521 __m256d vec_x,vec_y,vec_y2,vec_vals; 522 MatScalar yval; 523 PetscInt r,row,nnz_in_row; 524 #else 525 PetscScalar sum[8]; 526 #endif 527 528 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 529 #pragma disjoint(*x,*y,*aval) 530 #endif 531 532 PetscFunctionBegin; 533 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 534 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 535 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 536 for (i=0; i<totalslices; i++) { /* loop over slices */ 537 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 538 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 539 540 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 541 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 542 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 543 } else { 544 vec_y = _mm512_loadu_pd(&y[8*i]); 545 } 546 vec_y2 = _mm512_setzero_pd(); 547 vec_y3 = _mm512_setzero_pd(); 548 vec_y4 = _mm512_setzero_pd(); 549 550 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 551 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 552 case 3: 553 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 554 acolidx += 8; aval += 8; 555 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 556 acolidx += 8; aval += 8; 557 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 558 acolidx += 8; aval += 8; 559 j += 3; 560 break; 561 case 2: 562 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 563 acolidx += 8; aval += 8; 564 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 565 acolidx += 8; aval += 8; 566 j += 2; 567 break; 568 case 1: 569 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 570 acolidx += 8; aval += 8; 571 j += 1; 572 break; 573 } 574 #pragma novector 575 for (; j<(a->sliidx[i+1]>>3); j+=4) { 576 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 577 acolidx += 8; aval += 8; 578 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 579 acolidx += 8; aval += 8; 580 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 581 acolidx += 8; aval += 8; 582 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 583 acolidx += 8; aval += 8; 584 } 585 586 vec_y = _mm512_add_pd(vec_y,vec_y2); 587 vec_y = _mm512_add_pd(vec_y,vec_y3); 588 vec_y = _mm512_add_pd(vec_y,vec_y4); 589 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 590 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 591 } else { 592 _mm512_storeu_pd(&z[8*i],vec_y); 593 } 594 } 595 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 596 for (i=0; i<totalslices; i++) { /* loop over full slices */ 597 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 598 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 599 600 /* last slice may have padding rows. Don't use vectorization. */ 601 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 602 for (r=0; r<(A->rmap->n & 0x07); ++r) { 603 row = 8*i + r; 604 yval = (MatScalar)0.0; 605 nnz_in_row = a->rlen[row]; 606 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 607 z[row] = y[row] + yval; 608 } 609 break; 610 } 611 612 vec_y = _mm256_loadu_pd(y+8*i); 613 vec_y2 = _mm256_loadu_pd(y+8*i+4); 614 615 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 616 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 617 vec_vals = _mm256_loadu_pd(aval); 618 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 619 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 620 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 621 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 622 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 623 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 624 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 625 aval += 4; 626 627 vec_vals = _mm256_loadu_pd(aval); 628 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 629 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 630 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 631 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 632 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 633 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 634 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 635 aval += 4; 636 } 637 638 _mm256_storeu_pd(z+i*8,vec_y); 639 _mm256_storeu_pd(z+i*8+4,vec_y2); 640 } 641 #else 642 for (i=0; i<totalslices; i++) { /* loop over slices */ 643 for (j=0; j<8; j++) sum[j] = 0.0; 644 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 645 sum[0] += aval[j] * x[acolidx[j]]; 646 sum[1] += aval[j+1] * x[acolidx[j+1]]; 647 sum[2] += aval[j+2] * x[acolidx[j+2]]; 648 sum[3] += aval[j+3] * x[acolidx[j+3]]; 649 sum[4] += aval[j+4] * x[acolidx[j+4]]; 650 sum[5] += aval[j+5] * x[acolidx[j+5]]; 651 sum[6] += aval[j+6] * x[acolidx[j+6]]; 652 sum[7] += aval[j+7] * x[acolidx[j+7]]; 653 } 654 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 655 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 656 } else { 657 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 658 } 659 } 660 #endif 661 662 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 663 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 664 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 669 { 670 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 671 PetscScalar *y; 672 const PetscScalar *x; 673 const MatScalar *aval=a->val; 674 const PetscInt *acolidx=a->colidx; 675 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 676 PetscErrorCode ierr; 677 678 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 679 #pragma disjoint(*x,*y,*aval) 680 #endif 681 682 PetscFunctionBegin; 683 if (A->symmetric) { 684 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 688 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 689 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 690 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 691 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 692 for (r=0; r<(A->rmap->n & 0x07); ++r) { 693 row = 8*i + r; 694 nnz_in_row = a->rlen[row]; 695 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 696 } 697 break; 698 } 699 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 700 y[acolidx[j]] += aval[j] * x[8*i]; 701 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 702 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 703 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 704 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 705 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 706 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 707 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 708 } 709 } 710 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 711 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 712 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 717 { 718 PetscErrorCode ierr; 719 720 PetscFunctionBegin; 721 if (A->symmetric) { 722 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 723 } else { 724 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 725 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 /* 731 Checks for missing diagonals 732 */ 733 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 734 { 735 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 736 PetscInt *diag,i; 737 738 PetscFunctionBegin; 739 *missing = PETSC_FALSE; 740 if (A->rmap->n > 0 && !(a->colidx)) { 741 *missing = PETSC_TRUE; 742 if (d) *d = 0; 743 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 744 } else { 745 diag = a->diag; 746 for (i=0; i<A->rmap->n; i++) { 747 if (diag[i] == -1) { 748 *missing = PETSC_TRUE; 749 if (d) *d = i; 750 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 751 break; 752 } 753 } 754 } 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 759 { 760 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 761 PetscInt i,j,m=A->rmap->n,shift; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if (!a->diag) { 766 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 767 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 768 a->free_diag = PETSC_TRUE; 769 } 770 for (i=0; i<m; i++) { /* loop over rows */ 771 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 772 a->diag[i] = -1; 773 for (j=0; j<a->rlen[i]; j++) { 774 if (a->colidx[shift+j*8] == i) { 775 a->diag[i] = shift+j*8; 776 break; 777 } 778 } 779 } 780 PetscFunctionReturn(0); 781 } 782 783 /* 784 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 785 */ 786 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 787 { 788 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 789 PetscInt i,*diag,m = A->rmap->n; 790 MatScalar *val = a->val; 791 PetscScalar *idiag,*mdiag; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 if (a->idiagvalid) PetscFunctionReturn(0); 796 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 797 diag = a->diag; 798 if (!a->idiag) { 799 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 800 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 801 val = a->val; 802 } 803 mdiag = a->mdiag; 804 idiag = a->idiag; 805 806 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 807 for (i=0; i<m; i++) { 808 mdiag[i] = val[diag[i]]; 809 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 810 if (PetscRealPart(fshift)) { 811 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 812 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 813 A->factorerror_zeropivot_value = 0.0; 814 A->factorerror_zeropivot_row = i; 815 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 816 } 817 idiag[i] = 1.0/val[diag[i]]; 818 } 819 ierr = PetscLogFlops(m);CHKERRQ(ierr); 820 } else { 821 for (i=0; i<m; i++) { 822 mdiag[i] = val[diag[i]]; 823 idiag[i] = omega/(fshift + val[diag[i]]); 824 } 825 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 826 } 827 a->idiagvalid = PETSC_TRUE; 828 PetscFunctionReturn(0); 829 } 830 831 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 832 { 833 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 838 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 PetscErrorCode MatDestroy_SeqSELL(Mat A) 843 { 844 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 845 PetscErrorCode ierr; 846 847 PetscFunctionBegin; 848 #if defined(PETSC_USE_LOG) 849 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 850 #endif 851 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 852 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 853 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 854 ierr = PetscFree(a->diag);CHKERRQ(ierr); 855 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 856 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 857 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 858 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 859 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 860 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 861 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 862 863 ierr = PetscFree(A->data);CHKERRQ(ierr); 864 865 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 866 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 868 #if defined(PETSC_HAVE_ELEMENTAL) 869 #endif 870 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 875 { 876 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 switch (op) { 881 case MAT_ROW_ORIENTED: 882 a->roworiented = flg; 883 break; 884 case MAT_KEEP_NONZERO_PATTERN: 885 a->keepnonzeropattern = flg; 886 break; 887 case MAT_NEW_NONZERO_LOCATIONS: 888 a->nonew = (flg ? 0 : 1); 889 break; 890 case MAT_NEW_NONZERO_LOCATION_ERR: 891 a->nonew = (flg ? -1 : 0); 892 break; 893 case MAT_NEW_NONZERO_ALLOCATION_ERR: 894 a->nonew = (flg ? -2 : 0); 895 break; 896 case MAT_UNUSED_NONZERO_LOCATION_ERR: 897 a->nounused = (flg ? -1 : 0); 898 break; 899 case MAT_NEW_DIAGONALS: 900 case MAT_IGNORE_OFF_PROC_ENTRIES: 901 case MAT_USE_HASH_TABLE: 902 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 903 break; 904 case MAT_SPD: 905 case MAT_SYMMETRIC: 906 case MAT_STRUCTURALLY_SYMMETRIC: 907 case MAT_HERMITIAN: 908 case MAT_SYMMETRY_ETERNAL: 909 /* These options are handled directly by MatSetOption() */ 910 break; 911 default: 912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 918 { 919 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 920 PetscInt i,j,n,shift; 921 PetscScalar *x,zero=0.0; 922 PetscErrorCode ierr; 923 924 PetscFunctionBegin; 925 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 926 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 927 928 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 929 PetscInt *diag=a->diag; 930 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 931 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 932 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 ierr = VecSet(v,zero);CHKERRQ(ierr); 937 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 938 for (i=0; i<n; i++) { /* loop over rows */ 939 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 940 x[i] = 0; 941 for (j=0; j<a->rlen[i]; j++) { 942 if (a->colidx[shift+j*8] == i) { 943 x[i] = a->val[shift+j*8]; 944 break; 945 } 946 } 947 } 948 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 953 { 954 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 955 const PetscScalar *l,*r; 956 PetscInt i,j,m,n,row; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 if (ll) { 961 /* The local size is used so that VecMPI can be passed to this routine 962 by MatDiagonalScale_MPISELL */ 963 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 964 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 965 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 966 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 967 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 968 a->val[j] *= l[8*i+row]; 969 } 970 } 971 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 972 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 973 } 974 if (rr) { 975 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 976 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 977 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 978 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 979 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 980 a->val[j] *= r[a->colidx[j]]; 981 } 982 } 983 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 984 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 985 } 986 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 991 992 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 993 { 994 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 995 PetscInt *cp,i,k,low,high,t,row,col,l; 996 PetscInt shift; 997 MatScalar *vp; 998 999 PetscFunctionBegin; 1000 for (k=0; k<m; k++) { /* loop over requested rows */ 1001 row = im[k]; 1002 if (row<0) continue; 1003 #if defined(PETSC_USE_DEBUG) 1004 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1005 #endif 1006 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1007 cp = a->colidx+shift; /* pointer to the row */ 1008 vp = a->val+shift; /* pointer to the row */ 1009 for (l=0; l<n; l++) { /* loop over requested columns */ 1010 col = in[l]; 1011 if (col<0) continue; 1012 #if defined(PETSC_USE_DEBUG) 1013 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1014 #endif 1015 high = a->rlen[row]; low = 0; /* assume unsorted */ 1016 while (high-low > 5) { 1017 t = (low+high)/2; 1018 if (*(cp+t*8) > col) high = t; 1019 else low = t; 1020 } 1021 for (i=low; i<high; i++) { 1022 if (*(cp+8*i) > col) break; 1023 if (*(cp+8*i) == col) { 1024 *v++ = *(vp+8*i); 1025 goto finished; 1026 } 1027 } 1028 *v++ = 0.0; 1029 finished:; 1030 } 1031 } 1032 PetscFunctionReturn(0); 1033 } 1034 1035 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1036 { 1037 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1038 PetscInt i,j,m=A->rmap->n,shift; 1039 const char *name; 1040 PetscViewerFormat format; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1045 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1046 PetscInt nofinalvalue = 0; 1047 /* 1048 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1049 nofinalvalue = 1; 1050 } 1051 */ 1052 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1053 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1055 #if defined(PETSC_USE_COMPLEX) 1056 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1057 #else 1058 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1059 #endif 1060 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1061 1062 for (i=0; i<m; i++) { 1063 shift = a->sliidx[i>>3]+(i&0x07); 1064 for (j=0; j<a->rlen[i]; j++) { 1065 #if defined(PETSC_USE_COMPLEX) 1066 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1067 #else 1068 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1069 #endif 1070 } 1071 } 1072 /* 1073 if (nofinalvalue) { 1074 #if defined(PETSC_USE_COMPLEX) 1075 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1076 #else 1077 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1078 #endif 1079 } 1080 */ 1081 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1084 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1085 PetscFunctionReturn(0); 1086 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1087 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1088 for (i=0; i<m; i++) { 1089 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1090 shift = a->sliidx[i>>3]+(i&0x07); 1091 for (j=0; j<a->rlen[i]; j++) { 1092 #if defined(PETSC_USE_COMPLEX) 1093 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1094 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1095 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1096 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1097 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1098 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1099 } 1100 #else 1101 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1102 #endif 1103 } 1104 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1105 } 1106 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1107 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1108 PetscInt cnt=0,jcnt; 1109 PetscScalar value; 1110 #if defined(PETSC_USE_COMPLEX) 1111 PetscBool realonly=PETSC_TRUE; 1112 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1113 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1114 realonly = PETSC_FALSE; 1115 break; 1116 } 1117 } 1118 #endif 1119 1120 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1121 for (i=0; i<m; i++) { 1122 jcnt = 0; 1123 shift = a->sliidx[i>>3]+(i&0x07); 1124 for (j=0; j<A->cmap->n; j++) { 1125 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1126 value = a->val[cnt++]; 1127 jcnt++; 1128 } else { 1129 value = 0.0; 1130 } 1131 #if defined(PETSC_USE_COMPLEX) 1132 if (realonly) { 1133 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1134 } else { 1135 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1136 } 1137 #else 1138 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1139 #endif 1140 } 1141 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1142 } 1143 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1144 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1145 PetscInt fshift=1; 1146 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1147 #if defined(PETSC_USE_COMPLEX) 1148 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1149 #else 1150 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1151 #endif 1152 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1153 for (i=0; i<m; i++) { 1154 shift = a->sliidx[i>>3]+(i&0x07); 1155 for (j=0; j<a->rlen[i]; j++) { 1156 #if defined(PETSC_USE_COMPLEX) 1157 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1158 #else 1159 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1160 #endif 1161 } 1162 } 1163 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1164 } else if (format == PETSC_VIEWER_NATIVE) { 1165 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1166 PetscInt row; 1167 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1168 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1169 #if defined(PETSC_USE_COMPLEX) 1170 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1171 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1172 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1173 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1174 } else { 1175 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1176 } 1177 #else 1178 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1179 #endif 1180 } 1181 } 1182 } else { 1183 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1184 if (A->factortype) { 1185 for (i=0; i<m; i++) { 1186 shift = a->sliidx[i>>3]+(i&0x07); 1187 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1188 /* L part */ 1189 for (j=shift; j<a->diag[i]; j+=8) { 1190 #if defined(PETSC_USE_COMPLEX) 1191 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1192 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1193 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1194 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1195 } else { 1196 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1197 } 1198 #else 1199 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1200 #endif 1201 } 1202 /* diagonal */ 1203 j = a->diag[i]; 1204 #if defined(PETSC_USE_COMPLEX) 1205 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1206 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1207 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1208 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1209 } else { 1210 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1211 } 1212 #else 1213 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1214 #endif 1215 1216 /* U part */ 1217 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1218 #if defined(PETSC_USE_COMPLEX) 1219 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1220 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1221 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1222 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1223 } else { 1224 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1225 } 1226 #else 1227 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1228 #endif 1229 } 1230 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1231 } 1232 } else { 1233 for (i=0; i<m; i++) { 1234 shift = a->sliidx[i>>3]+(i&0x07); 1235 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1236 for (j=0; j<a->rlen[i]; j++) { 1237 #if defined(PETSC_USE_COMPLEX) 1238 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1239 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1240 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1241 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1244 } 1245 #else 1246 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1247 #endif 1248 } 1249 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1250 } 1251 } 1252 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1253 } 1254 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #include <petscdraw.h> 1259 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1260 { 1261 Mat A=(Mat)Aa; 1262 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1263 PetscInt i,j,m=A->rmap->n,shift; 1264 int color; 1265 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1266 PetscViewer viewer; 1267 PetscViewerFormat format; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1272 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1273 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1274 1275 /* loop over matrix elements drawing boxes */ 1276 1277 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1278 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1279 /* Blue for negative, Cyan for zero and Red for positive */ 1280 color = PETSC_DRAW_BLUE; 1281 for (i=0; i<m; i++) { 1282 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1283 y_l = m - i - 1.0; y_r = y_l + 1.0; 1284 for (j=0; j<a->rlen[i]; j++) { 1285 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1286 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1287 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1288 } 1289 } 1290 color = PETSC_DRAW_CYAN; 1291 for (i=0; i<m; i++) { 1292 shift = a->sliidx[i>>3]+(i&0x07); 1293 y_l = m - i - 1.0; y_r = y_l + 1.0; 1294 for (j=0; j<a->rlen[i]; j++) { 1295 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1296 if (a->val[shift+8*j] != 0.) continue; 1297 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1298 } 1299 } 1300 color = PETSC_DRAW_RED; 1301 for (i=0; i<m; i++) { 1302 shift = a->sliidx[i>>3]+(i&0x07); 1303 y_l = m - i - 1.0; y_r = y_l + 1.0; 1304 for (j=0; j<a->rlen[i]; j++) { 1305 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1306 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1307 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1308 } 1309 } 1310 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1311 } else { 1312 /* use contour shading to indicate magnitude of values */ 1313 /* first determine max of all nonzero values */ 1314 PetscReal minv=0.0,maxv=0.0; 1315 PetscInt count=0; 1316 PetscDraw popup; 1317 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1318 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1319 } 1320 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1321 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1322 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1323 1324 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1325 for (i=0; i<m; i++) { 1326 shift = a->sliidx[i>>3]+(i&0x07); 1327 y_l = m - i - 1.0; 1328 y_r = y_l + 1.0; 1329 for (j=0; j<a->rlen[i]; j++) { 1330 x_l = a->colidx[shift+j*8]; 1331 x_r = x_l + 1.0; 1332 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1333 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1334 count++; 1335 } 1336 } 1337 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1338 } 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #include <petscdraw.h> 1343 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1344 { 1345 PetscDraw draw; 1346 PetscReal xr,yr,xl,yl,h,w; 1347 PetscBool isnull; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1352 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1353 if (isnull) PetscFunctionReturn(0); 1354 1355 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1356 xr += w; yr += h; xl = -w; yl = -h; 1357 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1358 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1359 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1360 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1361 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1366 { 1367 PetscBool iascii,isbinary,isdraw; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1372 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1373 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1374 if (iascii) { 1375 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1376 } else if (isbinary) { 1377 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1378 } else if (isdraw) { 1379 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1385 { 1386 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1387 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1388 MatScalar *vp; 1389 PetscErrorCode ierr; 1390 1391 PetscFunctionBegin; 1392 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1393 /* To do: compress out the unused elements */ 1394 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1395 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1396 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1397 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1398 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1399 for (i=0; i<a->totalslices; ++i) { 1400 shift = a->sliidx[i]; /* starting index of the slice */ 1401 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1402 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1403 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1404 row = 8*i + row_in_slice; 1405 nrow = a->rlen[row]; /* number of nonzeros in row */ 1406 /* 1407 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1408 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1409 */ 1410 lastcol = 0; 1411 if (nrow>0) { /* nonempty row */ 1412 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1413 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1414 for (j=1;j<8;j++) { 1415 if (a->rlen[8*i+j]) { 1416 lastcol = cp[j]; 1417 break; 1418 } 1419 } 1420 } else { 1421 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1422 } 1423 1424 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1425 cp[8*k+row_in_slice] = lastcol; 1426 vp[8*k+row_in_slice] = (MatScalar)0; 1427 } 1428 } 1429 } 1430 1431 A->info.mallocs += a->reallocs; 1432 a->reallocs = 0; 1433 1434 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1439 { 1440 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1441 1442 PetscFunctionBegin; 1443 info->block_size = 1.0; 1444 info->nz_allocated = (double)a->maxallocmat; 1445 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1446 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1447 info->assemblies = (double)A->num_ass; 1448 info->mallocs = (double)A->info.mallocs; 1449 info->memory = ((PetscObject)A)->mem; 1450 if (A->factortype) { 1451 info->fill_ratio_given = A->info.fill_ratio_given; 1452 info->fill_ratio_needed = A->info.fill_ratio_needed; 1453 info->factor_mallocs = A->info.factor_mallocs; 1454 } else { 1455 info->fill_ratio_given = 0; 1456 info->fill_ratio_needed = 0; 1457 info->factor_mallocs = 0; 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1463 { 1464 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1465 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1466 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1467 MatScalar *vp,value; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 for (k=0; k<m; k++) { /* loop over added rows */ 1472 row = im[k]; 1473 if (row < 0) continue; 1474 #if defined(PETSC_USE_DEBUG) 1475 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1476 #endif 1477 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1478 cp = a->colidx+shift; /* pointer to the row */ 1479 vp = a->val+shift; /* pointer to the row */ 1480 nrow = a->rlen[row]; 1481 low = 0; 1482 high = nrow; 1483 1484 for (l=0; l<n; l++) { /* loop over added columns */ 1485 col = in[l]; 1486 if (col<0) continue; 1487 #if defined(PETSC_USE_DEBUG) 1488 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1489 #endif 1490 if (a->roworiented) { 1491 value = v[l+k*n]; 1492 } else { 1493 value = v[k+l*m]; 1494 } 1495 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1496 1497 /* search in this row for the specified colmun, i indicates the column to be set */ 1498 if (col <= lastcol) low = 0; 1499 else high = nrow; 1500 lastcol = col; 1501 while (high-low > 5) { 1502 t = (low+high)/2; 1503 if (*(cp+t*8) > col) high = t; 1504 else low = t; 1505 } 1506 for (i=low; i<high; i++) { 1507 if (*(cp+i*8) > col) break; 1508 if (*(cp+i*8) == col) { 1509 if (is == ADD_VALUES) *(vp+i*8) += value; 1510 else *(vp+i*8) = value; 1511 low = i + 1; 1512 goto noinsert; 1513 } 1514 } 1515 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1516 if (nonew == 1) goto noinsert; 1517 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1518 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1519 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1520 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1521 for (ii=nrow-1; ii>=i; ii--) { 1522 *(cp+(ii+1)*8) = *(cp+ii*8); 1523 *(vp+(ii+1)*8) = *(vp+ii*8); 1524 } 1525 a->rlen[row]++; 1526 *(cp+i*8) = col; 1527 *(vp+i*8) = value; 1528 a->nz++; 1529 A->nonzerostate++; 1530 low = i+1; high++; nrow++; 1531 noinsert:; 1532 } 1533 a->rlen[row] = nrow; 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1539 { 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 /* If the two matrices have the same copy implementation, use fast copy. */ 1544 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1545 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1546 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1547 1548 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1549 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1550 } else { 1551 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1557 { 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 1565 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1566 { 1567 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1568 1569 PetscFunctionBegin; 1570 *array = a->val; 1571 PetscFunctionReturn(0); 1572 } 1573 1574 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1575 { 1576 PetscFunctionBegin; 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1581 { 1582 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1583 PetscInt i; 1584 MatScalar *aval=a->val; 1585 1586 PetscFunctionBegin; 1587 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1592 { 1593 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1594 PetscInt i; 1595 MatScalar *aval=a->val; 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1600 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1605 { 1606 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1607 MatScalar *aval=a->val; 1608 PetscScalar oalpha=alpha; 1609 PetscBLASInt one=1,size; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1614 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1615 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1616 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1621 { 1622 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 if (!Y->preallocated || !y->nz) { 1627 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1628 } 1629 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 1633 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1634 { 1635 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1636 PetscScalar *x,sum,*t; 1637 const MatScalar *idiag=0,*mdiag; 1638 const PetscScalar *b,*xb; 1639 PetscInt n,m=A->rmap->n,i,j,shift; 1640 const PetscInt *diag; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 its = its*lits; 1645 1646 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1647 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1648 a->fshift = fshift; 1649 a->omega = omega; 1650 1651 diag = a->diag; 1652 t = a->ssor_work; 1653 idiag = a->idiag; 1654 mdiag = a->mdiag; 1655 1656 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1657 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1658 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1659 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1660 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1661 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1662 1663 if (flag & SOR_ZERO_INITIAL_GUESS) { 1664 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1665 for (i=0; i<m; i++) { 1666 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1667 sum = b[i]; 1668 n = (diag[i]-shift)/8; 1669 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1670 t[i] = sum; 1671 x[i] = sum*idiag[i]; 1672 } 1673 xb = t; 1674 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1675 } else xb = b; 1676 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1677 for (i=m-1; i>=0; i--) { 1678 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1679 sum = xb[i]; 1680 n = a->rlen[i]-(diag[i]-shift)/8-1; 1681 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1682 if (xb == b) { 1683 x[i] = sum*idiag[i]; 1684 } else { 1685 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1686 } 1687 } 1688 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1689 } 1690 its--; 1691 } 1692 while (its--) { 1693 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1694 for (i=0; i<m; i++) { 1695 /* lower */ 1696 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1697 sum = b[i]; 1698 n = (diag[i]-shift)/8; 1699 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1700 t[i] = sum; /* save application of the lower-triangular part */ 1701 /* upper */ 1702 n = a->rlen[i]-(diag[i]-shift)/8-1; 1703 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1704 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1705 } 1706 xb = t; 1707 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1708 } else xb = b; 1709 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1710 for (i=m-1; i>=0; i--) { 1711 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1712 sum = xb[i]; 1713 if (xb == b) { 1714 /* whole matrix (no checkpointing available) */ 1715 n = a->rlen[i]; 1716 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1717 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1718 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1719 n = a->rlen[i]-(diag[i]-shift)/8-1; 1720 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1721 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1722 } 1723 } 1724 if (xb == b) { 1725 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1726 } else { 1727 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1728 } 1729 } 1730 } 1731 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1732 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 /* -------------------------------------------------------------------*/ 1737 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1738 MatGetRow_SeqSELL, 1739 MatRestoreRow_SeqSELL, 1740 MatMult_SeqSELL, 1741 /* 4*/ MatMultAdd_SeqSELL, 1742 MatMultTranspose_SeqSELL, 1743 MatMultTransposeAdd_SeqSELL, 1744 0, 1745 0, 1746 0, 1747 /* 10*/ 0, 1748 0, 1749 0, 1750 MatSOR_SeqSELL, 1751 0, 1752 /* 15*/ MatGetInfo_SeqSELL, 1753 MatEqual_SeqSELL, 1754 MatGetDiagonal_SeqSELL, 1755 MatDiagonalScale_SeqSELL, 1756 0, 1757 /* 20*/ 0, 1758 MatAssemblyEnd_SeqSELL, 1759 MatSetOption_SeqSELL, 1760 MatZeroEntries_SeqSELL, 1761 /* 24*/ 0, 1762 0, 1763 0, 1764 0, 1765 0, 1766 /* 29*/ MatSetUp_SeqSELL, 1767 0, 1768 0, 1769 0, 1770 0, 1771 /* 34*/ MatDuplicate_SeqSELL, 1772 0, 1773 0, 1774 0, 1775 0, 1776 /* 39*/ 0, 1777 0, 1778 0, 1779 MatGetValues_SeqSELL, 1780 MatCopy_SeqSELL, 1781 /* 44*/ 0, 1782 MatScale_SeqSELL, 1783 MatShift_SeqSELL, 1784 0, 1785 0, 1786 /* 49*/ 0, 1787 0, 1788 0, 1789 0, 1790 0, 1791 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1792 0, 1793 0, 1794 0, 1795 0, 1796 /* 59*/ 0, 1797 MatDestroy_SeqSELL, 1798 MatView_SeqSELL, 1799 0, 1800 0, 1801 /* 64*/ 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /* 69*/ 0, 1807 0, 1808 0, 1809 0, 1810 0, 1811 /* 74*/ 0, 1812 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1813 0, 1814 0, 1815 0, 1816 /* 79*/ 0, 1817 0, 1818 0, 1819 0, 1820 0, 1821 /* 84*/ 0, 1822 0, 1823 0, 1824 0, 1825 0, 1826 /* 89*/ 0, 1827 0, 1828 0, 1829 0, 1830 0, 1831 /* 94*/ 0, 1832 0, 1833 0, 1834 0, 1835 0, 1836 /* 99*/ 0, 1837 0, 1838 0, 1839 MatConjugate_SeqSELL, 1840 0, 1841 /*104*/ 0, 1842 0, 1843 0, 1844 0, 1845 0, 1846 /*109*/ 0, 1847 0, 1848 0, 1849 0, 1850 MatMissingDiagonal_SeqSELL, 1851 /*114*/ 0, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*119*/ 0, 1857 0, 1858 0, 1859 0, 1860 0, 1861 /*124*/ 0, 1862 0, 1863 0, 1864 0, 1865 0, 1866 /*129*/ 0, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*134*/ 0, 1872 0, 1873 0, 1874 0, 1875 0, 1876 /*139*/ 0, 1877 0, 1878 0, 1879 MatFDColoringSetUp_SeqXAIJ, 1880 0, 1881 /*144*/0 1882 }; 1883 1884 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1885 { 1886 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1891 1892 /* allocate space for values if not already there */ 1893 if (!a->saved_values) { 1894 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1895 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1896 } 1897 1898 /* copy values over */ 1899 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1904 { 1905 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1910 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1911 /* copy values over */ 1912 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 /*@C 1917 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1918 1919 Not Collective 1920 1921 Input Parameters: 1922 . mat - a MATSEQSELL matrix 1923 . array - pointer to the data 1924 1925 Level: intermediate 1926 1927 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1928 @*/ 1929 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1930 { 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1939 { 1940 Mat_SeqSELL *b; 1941 PetscMPIInt size; 1942 PetscErrorCode ierr; 1943 1944 PetscFunctionBegin; 1945 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1946 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1947 1948 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1949 1950 B->data = (void*)b; 1951 1952 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1953 1954 b->row = 0; 1955 b->col = 0; 1956 b->icol = 0; 1957 b->reallocs = 0; 1958 b->ignorezeroentries = PETSC_FALSE; 1959 b->roworiented = PETSC_TRUE; 1960 b->nonew = 0; 1961 b->diag = 0; 1962 b->solve_work = 0; 1963 B->spptr = 0; 1964 b->saved_values = 0; 1965 b->idiag = 0; 1966 b->mdiag = 0; 1967 b->ssor_work = 0; 1968 b->omega = 1.0; 1969 b->fshift = 0.0; 1970 b->idiagvalid = PETSC_FALSE; 1971 b->keepnonzeropattern = PETSC_FALSE; 1972 1973 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1978 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1979 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 /* 1984 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1985 */ 1986 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1987 { 1988 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1989 PetscInt i,m=A->rmap->n; 1990 PetscInt totalslices=a->totalslices; 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 c = (Mat_SeqSELL*)C->data; 1995 1996 C->factortype = A->factortype; 1997 c->row = 0; 1998 c->col = 0; 1999 c->icol = 0; 2000 c->reallocs = 0; 2001 2002 C->assembled = PETSC_TRUE; 2003 2004 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2005 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2006 2007 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2008 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2009 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2010 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2011 2012 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2013 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2014 2015 /* allocate the matrix space */ 2016 if (mallocmatspace) { 2017 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2018 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2019 2020 c->singlemalloc = PETSC_TRUE; 2021 2022 if (m > 0) { 2023 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2024 if (cpvalues == MAT_COPY_VALUES) { 2025 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2026 } else { 2027 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2028 } 2029 } 2030 } 2031 2032 c->ignorezeroentries = a->ignorezeroentries; 2033 c->roworiented = a->roworiented; 2034 c->nonew = a->nonew; 2035 if (a->diag) { 2036 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2037 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2038 for (i=0; i<m; i++) { 2039 c->diag[i] = a->diag[i]; 2040 } 2041 } else c->diag = 0; 2042 2043 c->solve_work = 0; 2044 c->saved_values = 0; 2045 c->idiag = 0; 2046 c->ssor_work = 0; 2047 c->keepnonzeropattern = a->keepnonzeropattern; 2048 c->free_val = PETSC_TRUE; 2049 c->free_colidx = PETSC_TRUE; 2050 2051 c->maxallocmat = a->maxallocmat; 2052 c->maxallocrow = a->maxallocrow; 2053 c->rlenmax = a->rlenmax; 2054 c->nz = a->nz; 2055 C->preallocated = PETSC_TRUE; 2056 2057 c->nonzerorowcnt = a->nonzerorowcnt; 2058 C->nonzerostate = A->nonzerostate; 2059 2060 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2061 PetscFunctionReturn(0); 2062 } 2063 2064 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2065 { 2066 PetscErrorCode ierr; 2067 2068 PetscFunctionBegin; 2069 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2070 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2071 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2072 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2073 } 2074 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2075 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 /*@C 2080 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2081 2082 Collective on MPI_Comm 2083 2084 Input Parameters: 2085 + comm - MPI communicator, set to PETSC_COMM_SELF 2086 . m - number of rows 2087 . n - number of columns 2088 . rlenmax - maximum number of nonzeros in a row 2089 - rlen - array containing the number of nonzeros in the various rows 2090 (possibly different for each row) or NULL 2091 2092 Output Parameter: 2093 . A - the matrix 2094 2095 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2096 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2097 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2098 2099 Notes: 2100 If nnz is given then nz is ignored 2101 2102 Specify the preallocated storage with either rlenmax or rlen (not both). 2103 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2104 allocation. For large problems you MUST preallocate memory or you 2105 will get TERRIBLE performance, see the users' manual chapter on matrices. 2106 2107 Level: intermediate 2108 2109 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2110 2111 @*/ 2112 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2113 { 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2118 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2119 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2120 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2125 { 2126 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2127 PetscInt totalslices=a->totalslices; 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 /* If the matrix dimensions are not equal,or no of nonzeros */ 2132 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2133 *flg = PETSC_FALSE; 2134 PetscFunctionReturn(0); 2135 } 2136 /* if the a->colidx are the same */ 2137 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2138 if (!*flg) PetscFunctionReturn(0); 2139 /* if a->val are the same */ 2140 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2145 { 2146 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2147 2148 PetscFunctionBegin; 2149 a->idiagvalid = PETSC_FALSE; 2150 PetscFunctionReturn(0); 2151 } 2152 2153 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2154 { 2155 #if defined(PETSC_USE_COMPLEX) 2156 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2157 PetscInt i; 2158 PetscScalar *val = a->val; 2159 2160 PetscFunctionBegin; 2161 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2162 val[i] = PetscConj(val[i]); 2163 } 2164 #else 2165 PetscFunctionBegin; 2166 #endif 2167 PetscFunctionReturn(0); 2168 } 2169