1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 9 10 #include <immintrin.h> 11 12 #if !defined(_MM_SCALE_8) 13 #define _MM_SCALE_8 8 14 #endif 15 16 #if defined(__AVX512F__) 17 /* these do not work 18 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 19 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 20 */ 21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 22 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 23 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 24 vec_vals = _mm512_loadu_pd(aval); \ 25 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 26 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 27 #elif defined(__AVX2__) && defined(__FMA__) 28 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 29 vec_vals = _mm256_loadu_pd(aval); \ 30 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 31 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 32 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 33 #endif 34 #endif /* PETSC_HAVE_IMMINTRIN_H */ 35 36 /*@C 37 MatSeqSELLSetPreallocation - For good matrix assembly performance 38 the user should preallocate the matrix storage by setting the parameter nz 39 (or the array nnz). By setting these parameters accurately, performance 40 during matrix assembly can be increased significantly. 41 42 Collective 43 44 Input Parameters: 45 + B - The matrix 46 . nz - number of nonzeros per row (same for all rows) 47 - nnz - array containing the number of nonzeros in the various rows 48 (possibly different for each row) or NULL 49 50 Notes: 51 If nnz is given then nz is ignored. 52 53 Specify the preallocated storage with either nz or nnz (not both). 54 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 55 allocation. For large problems you MUST preallocate memory or you 56 will get TERRIBLE performance, see the users' manual chapter on matrices. 57 58 You can call MatGetInfo() to get information on how effective the preallocation was; 59 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 60 You can also run with the option -info and look for messages with the string 61 malloc in them to see if additional memory allocation was needed. 62 63 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 64 entries or columns indices. 65 66 The maximum number of nonzeos in any row should be as accurate as possible. 67 If it is underestimated, you will get bad performance due to reallocation 68 (MatSeqXSELLReallocateSELL). 69 70 Level: intermediate 71 72 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 73 74 @*/ 75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 81 PetscValidType(B,1); 82 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 87 { 88 Mat_SeqSELL *b; 89 PetscInt i,j,totalslices; 90 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 95 if (maxallocrow == MAT_SKIP_ALLOCATION) { 96 skipallocation = PETSC_TRUE; 97 maxallocrow = 0; 98 } 99 100 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 101 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 102 103 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 104 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 105 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 106 if (rlen) { 107 for (i=0; i<B->rmap->n; i++) { 108 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 109 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 110 } 111 } 112 113 B->preallocated = PETSC_TRUE; 114 115 b = (Mat_SeqSELL*)B->data; 116 117 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 118 b->totalslices = totalslices; 119 if (!skipallocation) { 120 if (B->rmap->n & 0x07) {ierr = PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);CHKERRQ(ierr);} 121 122 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 123 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 125 } 126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 128 else if (maxallocrow < 0) maxallocrow = 1; 129 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 130 } else { 131 maxallocrow = 0; 132 b->sliidx[0] = 0; 133 for (i=1; i<totalslices; i++) { 134 b->sliidx[i] = 0; 135 for (j=0;j<8;j++) { 136 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 137 } 138 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 139 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 140 } 141 /* last slice */ 142 b->sliidx[totalslices] = 0; 143 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 144 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 145 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 146 } 147 148 /* allocate space for val, colidx, rlen */ 149 /* FIXME: should B's old memory be unlogged? */ 150 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 151 /* FIXME: assuming an element of the bit array takes 8 bits */ 152 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 153 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 154 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 155 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 156 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 157 158 b->singlemalloc = PETSC_TRUE; 159 b->free_val = PETSC_TRUE; 160 b->free_colidx = PETSC_TRUE; 161 } else { 162 b->free_val = PETSC_FALSE; 163 b->free_colidx = PETSC_FALSE; 164 } 165 166 b->nz = 0; 167 b->maxallocrow = maxallocrow; 168 b->rlenmax = maxallocrow; 169 b->maxallocmat = b->sliidx[totalslices]; 170 B->info.nz_unneeded = (double)b->maxallocmat; 171 if (realalloc) { 172 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 178 { 179 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 180 PetscInt shift; 181 182 PetscFunctionBegin; 183 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 184 if (nz) *nz = a->rlen[row]; 185 shift = a->sliidx[row>>3]+(row&0x07); 186 if (!a->getrowcols) { 187 PetscErrorCode ierr; 188 189 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 190 } 191 if (idx) { 192 PetscInt j; 193 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 194 *idx = a->getrowcols; 195 } 196 if (v) { 197 PetscInt j; 198 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 199 *v = a->getrowvals; 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 205 { 206 PetscFunctionBegin; 207 PetscFunctionReturn(0); 208 } 209 210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 211 { 212 Mat B; 213 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 214 PetscInt i; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (reuse == MAT_REUSE_MATRIX) { 219 B = *newmat; 220 ierr = MatZeroEntries(B);CHKERRQ(ierr); 221 } else { 222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 224 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 226 } 227 228 for (i=0; i<A->rmap->n; i++) { 229 PetscInt nz = 0,*cols = NULL; 230 PetscScalar *vals = NULL; 231 232 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 233 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 234 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 235 } 236 237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 B->rmap->bs = A->rmap->bs; 240 241 if (reuse == MAT_INPLACE_MATRIX) { 242 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 243 } else { 244 *newmat = B; 245 } 246 PetscFunctionReturn(0); 247 } 248 249 #include <../src/mat/impls/aij/seq/aij.h> 250 251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 252 { 253 Mat B; 254 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 255 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 256 const PetscInt *cols; 257 const PetscScalar *vals; 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 262 if (reuse == MAT_REUSE_MATRIX) { 263 B = *newmat; 264 } else { 265 /* Can we just use ilen? */ 266 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 267 for (i=0; i<m; i++) { 268 rowlengths[i] = ai[i+1] - ai[i]; 269 } 270 271 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 272 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 273 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 274 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 275 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 276 } 277 278 for (row=0; row<m; row++) { 279 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 280 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 281 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 282 } 283 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285 B->rmap->bs = A->rmap->bs; 286 287 if (reuse == MAT_INPLACE_MATRIX) { 288 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 289 } else { 290 *newmat = B; 291 } 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 296 { 297 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 298 PetscScalar *y; 299 const PetscScalar *x; 300 const MatScalar *aval=a->val; 301 PetscInt totalslices=a->totalslices; 302 const PetscInt *acolidx=a->colidx; 303 PetscInt i,j; 304 PetscErrorCode ierr; 305 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 306 __m512d vec_x,vec_y,vec_vals; 307 __m256i vec_idx; 308 __mmask8 mask; 309 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 310 __m256i vec_idx2,vec_idx3,vec_idx4; 311 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 312 __m128i vec_idx; 313 __m256d vec_x,vec_y,vec_y2,vec_vals; 314 MatScalar yval; 315 PetscInt r,rows_left,row,nnz_in_row; 316 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 317 __m128d vec_x_tmp; 318 __m256d vec_x,vec_y,vec_y2,vec_vals; 319 MatScalar yval; 320 PetscInt r,rows_left,row,nnz_in_row; 321 #else 322 PetscScalar sum[8]; 323 #endif 324 325 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 326 #pragma disjoint(*x,*y,*aval) 327 #endif 328 329 PetscFunctionBegin; 330 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 331 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 332 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 333 for (i=0; i<totalslices; i++) { /* loop over slices */ 334 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 335 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 336 337 vec_y = _mm512_setzero_pd(); 338 vec_y2 = _mm512_setzero_pd(); 339 vec_y3 = _mm512_setzero_pd(); 340 vec_y4 = _mm512_setzero_pd(); 341 342 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 343 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 344 case 3: 345 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 346 acolidx += 8; aval += 8; 347 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 348 acolidx += 8; aval += 8; 349 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 350 acolidx += 8; aval += 8; 351 j += 3; 352 break; 353 case 2: 354 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 355 acolidx += 8; aval += 8; 356 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 357 acolidx += 8; aval += 8; 358 j += 2; 359 break; 360 case 1: 361 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 362 acolidx += 8; aval += 8; 363 j += 1; 364 break; 365 } 366 #pragma novector 367 for (; j<(a->sliidx[i+1]>>3); j+=4) { 368 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 369 acolidx += 8; aval += 8; 370 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 371 acolidx += 8; aval += 8; 372 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 373 acolidx += 8; aval += 8; 374 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 375 acolidx += 8; aval += 8; 376 } 377 378 vec_y = _mm512_add_pd(vec_y,vec_y2); 379 vec_y = _mm512_add_pd(vec_y,vec_y3); 380 vec_y = _mm512_add_pd(vec_y,vec_y4); 381 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 382 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 383 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 384 } else { 385 _mm512_storeu_pd(&y[8*i],vec_y); 386 } 387 } 388 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 389 for (i=0; i<totalslices; i++) { /* loop over full slices */ 390 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 391 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 392 393 /* last slice may have padding rows. Don't use vectorization. */ 394 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 395 rows_left = A->rmap->n - 8*i; 396 for (r=0; r<rows_left; ++r) { 397 yval = (MatScalar)0; 398 row = 8*i + r; 399 nnz_in_row = a->rlen[row]; 400 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 401 y[row] = yval; 402 } 403 break; 404 } 405 406 vec_y = _mm256_setzero_pd(); 407 vec_y2 = _mm256_setzero_pd(); 408 409 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 410 #pragma novector 411 #pragma unroll(2) 412 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 413 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 414 aval += 4; acolidx += 4; 415 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 416 aval += 4; acolidx += 4; 417 } 418 419 _mm256_storeu_pd(y+i*8,vec_y); 420 _mm256_storeu_pd(y+i*8+4,vec_y2); 421 } 422 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 423 for (i=0; i<totalslices; i++) { /* loop over full slices */ 424 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 425 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 426 427 vec_y = _mm256_setzero_pd(); 428 vec_y2 = _mm256_setzero_pd(); 429 430 /* last slice may have padding rows. Don't use vectorization. */ 431 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 432 rows_left = A->rmap->n - 8*i; 433 for (r=0; r<rows_left; ++r) { 434 yval = (MatScalar)0; 435 row = 8*i + r; 436 nnz_in_row = a->rlen[row]; 437 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 438 y[row] = yval; 439 } 440 break; 441 } 442 443 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 444 #pragma novector 445 #pragma unroll(2) 446 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 447 vec_vals = _mm256_loadu_pd(aval); 448 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 449 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 450 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 451 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 452 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 453 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 454 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 455 aval += 4; 456 457 vec_vals = _mm256_loadu_pd(aval); 458 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 459 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 460 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 461 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 462 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 463 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 464 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 465 aval += 4; 466 } 467 468 _mm256_storeu_pd(y + i*8, vec_y); 469 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 470 } 471 #else 472 for (i=0; i<totalslices; i++) { /* loop over slices */ 473 for (j=0; j<8; j++) sum[j] = 0.0; 474 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 475 sum[0] += aval[j] * x[acolidx[j]]; 476 sum[1] += aval[j+1] * x[acolidx[j+1]]; 477 sum[2] += aval[j+2] * x[acolidx[j+2]]; 478 sum[3] += aval[j+3] * x[acolidx[j+3]]; 479 sum[4] += aval[j+4] * x[acolidx[j+4]]; 480 sum[5] += aval[j+5] * x[acolidx[j+5]]; 481 sum[6] += aval[j+6] * x[acolidx[j+6]]; 482 sum[7] += aval[j+7] * x[acolidx[j+7]]; 483 } 484 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 485 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 486 } else { 487 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 488 } 489 } 490 #endif 491 492 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 493 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 499 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 500 { 501 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 502 PetscScalar *y,*z; 503 const PetscScalar *x; 504 const MatScalar *aval=a->val; 505 PetscInt totalslices=a->totalslices; 506 const PetscInt *acolidx=a->colidx; 507 PetscInt i,j; 508 PetscErrorCode ierr; 509 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 510 __m512d vec_x,vec_y,vec_vals; 511 __m256i vec_idx; 512 __mmask8 mask; 513 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 514 __m256i vec_idx2,vec_idx3,vec_idx4; 515 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 516 __m128d vec_x_tmp; 517 __m256d vec_x,vec_y,vec_y2,vec_vals; 518 MatScalar yval; 519 PetscInt r,row,nnz_in_row; 520 #else 521 PetscScalar sum[8]; 522 #endif 523 524 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 525 #pragma disjoint(*x,*y,*aval) 526 #endif 527 528 PetscFunctionBegin; 529 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 530 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 531 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 532 for (i=0; i<totalslices; i++) { /* loop over slices */ 533 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 534 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 535 536 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 537 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 538 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 539 } else { 540 vec_y = _mm512_loadu_pd(&y[8*i]); 541 } 542 vec_y2 = _mm512_setzero_pd(); 543 vec_y3 = _mm512_setzero_pd(); 544 vec_y4 = _mm512_setzero_pd(); 545 546 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 547 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 548 case 3: 549 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 550 acolidx += 8; aval += 8; 551 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 552 acolidx += 8; aval += 8; 553 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 554 acolidx += 8; aval += 8; 555 j += 3; 556 break; 557 case 2: 558 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 559 acolidx += 8; aval += 8; 560 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 561 acolidx += 8; aval += 8; 562 j += 2; 563 break; 564 case 1: 565 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 566 acolidx += 8; aval += 8; 567 j += 1; 568 break; 569 } 570 #pragma novector 571 for (; j<(a->sliidx[i+1]>>3); j+=4) { 572 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 573 acolidx += 8; aval += 8; 574 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 575 acolidx += 8; aval += 8; 576 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 577 acolidx += 8; aval += 8; 578 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 579 acolidx += 8; aval += 8; 580 } 581 582 vec_y = _mm512_add_pd(vec_y,vec_y2); 583 vec_y = _mm512_add_pd(vec_y,vec_y3); 584 vec_y = _mm512_add_pd(vec_y,vec_y4); 585 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 586 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 587 } else { 588 _mm512_storeu_pd(&z[8*i],vec_y); 589 } 590 } 591 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 592 for (i=0; i<totalslices; i++) { /* loop over full slices */ 593 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 594 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 595 596 /* last slice may have padding rows. Don't use vectorization. */ 597 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 598 for (r=0; r<(A->rmap->n & 0x07); ++r) { 599 row = 8*i + r; 600 yval = (MatScalar)0.0; 601 nnz_in_row = a->rlen[row]; 602 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 603 z[row] = y[row] + yval; 604 } 605 break; 606 } 607 608 vec_y = _mm256_loadu_pd(y+8*i); 609 vec_y2 = _mm256_loadu_pd(y+8*i+4); 610 611 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 612 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 613 vec_vals = _mm256_loadu_pd(aval); 614 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 615 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 616 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 617 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 618 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 619 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 620 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 621 aval += 4; 622 623 vec_vals = _mm256_loadu_pd(aval); 624 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 625 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 626 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 627 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 628 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 629 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 630 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 631 aval += 4; 632 } 633 634 _mm256_storeu_pd(z+i*8,vec_y); 635 _mm256_storeu_pd(z+i*8+4,vec_y2); 636 } 637 #else 638 for (i=0; i<totalslices; i++) { /* loop over slices */ 639 for (j=0; j<8; j++) sum[j] = 0.0; 640 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 641 sum[0] += aval[j] * x[acolidx[j]]; 642 sum[1] += aval[j+1] * x[acolidx[j+1]]; 643 sum[2] += aval[j+2] * x[acolidx[j+2]]; 644 sum[3] += aval[j+3] * x[acolidx[j+3]]; 645 sum[4] += aval[j+4] * x[acolidx[j+4]]; 646 sum[5] += aval[j+5] * x[acolidx[j+5]]; 647 sum[6] += aval[j+6] * x[acolidx[j+6]]; 648 sum[7] += aval[j+7] * x[acolidx[j+7]]; 649 } 650 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 651 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 652 } else { 653 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 654 } 655 } 656 #endif 657 658 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 659 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 660 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 665 { 666 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 667 PetscScalar *y; 668 const PetscScalar *x; 669 const MatScalar *aval=a->val; 670 const PetscInt *acolidx=a->colidx; 671 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 672 PetscErrorCode ierr; 673 674 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 675 #pragma disjoint(*x,*y,*aval) 676 #endif 677 678 PetscFunctionBegin; 679 if (A->symmetric) { 680 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 684 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 685 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 686 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 687 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 688 for (r=0; r<(A->rmap->n & 0x07); ++r) { 689 row = 8*i + r; 690 nnz_in_row = a->rlen[row]; 691 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 692 } 693 break; 694 } 695 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 696 y[acolidx[j]] += aval[j] * x[8*i]; 697 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 698 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 699 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 700 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 701 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 702 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 703 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 704 } 705 } 706 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 707 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 708 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 713 { 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 if (A->symmetric) { 718 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 719 } else { 720 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 721 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 722 } 723 PetscFunctionReturn(0); 724 } 725 726 /* 727 Checks for missing diagonals 728 */ 729 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 730 { 731 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 732 PetscInt *diag,i; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 *missing = PETSC_FALSE; 737 if (A->rmap->n > 0 && !(a->colidx)) { 738 *missing = PETSC_TRUE; 739 if (d) *d = 0; 740 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 741 } else { 742 diag = a->diag; 743 for (i=0; i<A->rmap->n; i++) { 744 if (diag[i] == -1) { 745 *missing = PETSC_TRUE; 746 if (d) *d = i; 747 ierr = PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);CHKERRQ(ierr); 748 break; 749 } 750 } 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 756 { 757 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 758 PetscInt i,j,m=A->rmap->n,shift; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 if (!a->diag) { 763 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 764 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 765 a->free_diag = PETSC_TRUE; 766 } 767 for (i=0; i<m; i++) { /* loop over rows */ 768 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 769 a->diag[i] = -1; 770 for (j=0; j<a->rlen[i]; j++) { 771 if (a->colidx[shift+j*8] == i) { 772 a->diag[i] = shift+j*8; 773 break; 774 } 775 } 776 } 777 PetscFunctionReturn(0); 778 } 779 780 /* 781 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 782 */ 783 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 784 { 785 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 786 PetscInt i,*diag,m = A->rmap->n; 787 MatScalar *val = a->val; 788 PetscScalar *idiag,*mdiag; 789 PetscErrorCode ierr; 790 791 PetscFunctionBegin; 792 if (a->idiagvalid) PetscFunctionReturn(0); 793 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 794 diag = a->diag; 795 if (!a->idiag) { 796 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 797 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 798 val = a->val; 799 } 800 mdiag = a->mdiag; 801 idiag = a->idiag; 802 803 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 804 for (i=0; i<m; i++) { 805 mdiag[i] = val[diag[i]]; 806 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 807 if (PetscRealPart(fshift)) { 808 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 809 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 810 A->factorerror_zeropivot_value = 0.0; 811 A->factorerror_zeropivot_row = i; 812 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 813 } 814 idiag[i] = 1.0/val[diag[i]]; 815 } 816 ierr = PetscLogFlops(m);CHKERRQ(ierr); 817 } else { 818 for (i=0; i<m; i++) { 819 mdiag[i] = val[diag[i]]; 820 idiag[i] = omega/(fshift + val[diag[i]]); 821 } 822 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 823 } 824 a->idiagvalid = PETSC_TRUE; 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 829 { 830 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 ierr = PetscArrayzero(a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 835 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatDestroy_SeqSELL(Mat A) 840 { 841 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 842 PetscErrorCode ierr; 843 844 PetscFunctionBegin; 845 #if defined(PETSC_USE_LOG) 846 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 847 #endif 848 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 849 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 850 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 851 ierr = PetscFree(a->diag);CHKERRQ(ierr); 852 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 853 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 854 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 855 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 856 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 857 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 858 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 859 860 ierr = PetscFree(A->data);CHKERRQ(ierr); 861 862 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 865 #if defined(PETSC_HAVE_ELEMENTAL) 866 #endif 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 872 { 873 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 switch (op) { 878 case MAT_ROW_ORIENTED: 879 a->roworiented = flg; 880 break; 881 case MAT_KEEP_NONZERO_PATTERN: 882 a->keepnonzeropattern = flg; 883 break; 884 case MAT_NEW_NONZERO_LOCATIONS: 885 a->nonew = (flg ? 0 : 1); 886 break; 887 case MAT_NEW_NONZERO_LOCATION_ERR: 888 a->nonew = (flg ? -1 : 0); 889 break; 890 case MAT_NEW_NONZERO_ALLOCATION_ERR: 891 a->nonew = (flg ? -2 : 0); 892 break; 893 case MAT_UNUSED_NONZERO_LOCATION_ERR: 894 a->nounused = (flg ? -1 : 0); 895 break; 896 case MAT_NEW_DIAGONALS: 897 case MAT_IGNORE_OFF_PROC_ENTRIES: 898 case MAT_USE_HASH_TABLE: 899 case MAT_SORTED_FULL: 900 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 901 break; 902 case MAT_SPD: 903 case MAT_SYMMETRIC: 904 case MAT_STRUCTURALLY_SYMMETRIC: 905 case MAT_HERMITIAN: 906 case MAT_SYMMETRY_ETERNAL: 907 /* These options are handled directly by MatSetOption() */ 908 break; 909 default: 910 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 916 { 917 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 918 PetscInt i,j,n,shift; 919 PetscScalar *x,zero=0.0; 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 924 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 925 926 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 927 PetscInt *diag=a->diag; 928 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 929 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 930 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 934 ierr = VecSet(v,zero);CHKERRQ(ierr); 935 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 936 for (i=0; i<n; i++) { /* loop over rows */ 937 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 938 x[i] = 0; 939 for (j=0; j<a->rlen[i]; j++) { 940 if (a->colidx[shift+j*8] == i) { 941 x[i] = a->val[shift+j*8]; 942 break; 943 } 944 } 945 } 946 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 951 { 952 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 953 const PetscScalar *l,*r; 954 PetscInt i,j,m,n,row; 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 if (ll) { 959 /* The local size is used so that VecMPI can be passed to this routine 960 by MatDiagonalScale_MPISELL */ 961 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 962 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 963 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 964 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 965 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 966 a->val[j] *= l[8*i+row]; 967 } 968 } 969 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 970 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 971 } 972 if (rr) { 973 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 974 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 975 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 976 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 977 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 978 a->val[j] *= r[a->colidx[j]]; 979 } 980 } 981 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 982 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 983 } 984 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 988 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 989 990 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 991 { 992 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 993 PetscInt *cp,i,k,low,high,t,row,col,l; 994 PetscInt shift; 995 MatScalar *vp; 996 997 PetscFunctionBegin; 998 for (k=0; k<m; k++) { /* loop over requested rows */ 999 row = im[k]; 1000 if (row<0) continue; 1001 #if defined(PETSC_USE_DEBUG) 1002 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1003 #endif 1004 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1005 cp = a->colidx+shift; /* pointer to the row */ 1006 vp = a->val+shift; /* pointer to the row */ 1007 for (l=0; l<n; l++) { /* loop over requested columns */ 1008 col = in[l]; 1009 if (col<0) continue; 1010 #if defined(PETSC_USE_DEBUG) 1011 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1012 #endif 1013 high = a->rlen[row]; low = 0; /* assume unsorted */ 1014 while (high-low > 5) { 1015 t = (low+high)/2; 1016 if (*(cp+t*8) > col) high = t; 1017 else low = t; 1018 } 1019 for (i=low; i<high; i++) { 1020 if (*(cp+8*i) > col) break; 1021 if (*(cp+8*i) == col) { 1022 *v++ = *(vp+8*i); 1023 goto finished; 1024 } 1025 } 1026 *v++ = 0.0; 1027 finished:; 1028 } 1029 } 1030 PetscFunctionReturn(0); 1031 } 1032 1033 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1034 { 1035 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1036 PetscInt i,j,m=A->rmap->n,shift; 1037 const char *name; 1038 PetscViewerFormat format; 1039 PetscErrorCode ierr; 1040 1041 PetscFunctionBegin; 1042 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1043 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1044 PetscInt nofinalvalue = 0; 1045 /* 1046 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1047 nofinalvalue = 1; 1048 } 1049 */ 1050 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1051 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1052 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1053 #if defined(PETSC_USE_COMPLEX) 1054 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1055 #else 1056 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1057 #endif 1058 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1059 1060 for (i=0; i<m; i++) { 1061 shift = a->sliidx[i>>3]+(i&0x07); 1062 for (j=0; j<a->rlen[i]; j++) { 1063 #if defined(PETSC_USE_COMPLEX) 1064 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1065 #else 1066 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1067 #endif 1068 } 1069 } 1070 /* 1071 if (nofinalvalue) { 1072 #if defined(PETSC_USE_COMPLEX) 1073 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1074 #else 1075 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1076 #endif 1077 } 1078 */ 1079 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1082 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1083 PetscFunctionReturn(0); 1084 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1085 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1086 for (i=0; i<m; i++) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1088 shift = a->sliidx[i>>3]+(i&0x07); 1089 for (j=0; j<a->rlen[i]; j++) { 1090 #if defined(PETSC_USE_COMPLEX) 1091 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1092 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1093 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1094 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1095 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1096 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1097 } 1098 #else 1099 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1100 #endif 1101 } 1102 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1103 } 1104 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1105 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1106 PetscInt cnt=0,jcnt; 1107 PetscScalar value; 1108 #if defined(PETSC_USE_COMPLEX) 1109 PetscBool realonly=PETSC_TRUE; 1110 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1111 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1112 realonly = PETSC_FALSE; 1113 break; 1114 } 1115 } 1116 #endif 1117 1118 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1119 for (i=0; i<m; i++) { 1120 jcnt = 0; 1121 shift = a->sliidx[i>>3]+(i&0x07); 1122 for (j=0; j<A->cmap->n; j++) { 1123 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1124 value = a->val[cnt++]; 1125 jcnt++; 1126 } else { 1127 value = 0.0; 1128 } 1129 #if defined(PETSC_USE_COMPLEX) 1130 if (realonly) { 1131 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1132 } else { 1133 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1134 } 1135 #else 1136 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1137 #endif 1138 } 1139 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1140 } 1141 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1142 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1143 PetscInt fshift=1; 1144 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1145 #if defined(PETSC_USE_COMPLEX) 1146 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1147 #else 1148 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1149 #endif 1150 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1151 for (i=0; i<m; i++) { 1152 shift = a->sliidx[i>>3]+(i&0x07); 1153 for (j=0; j<a->rlen[i]; j++) { 1154 #if defined(PETSC_USE_COMPLEX) 1155 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1156 #else 1157 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1158 #endif 1159 } 1160 } 1161 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1162 } else if (format == PETSC_VIEWER_NATIVE) { 1163 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1164 PetscInt row; 1165 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1166 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1167 #if defined(PETSC_USE_COMPLEX) 1168 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1169 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1170 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1171 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1172 } else { 1173 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1174 } 1175 #else 1176 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1177 #endif 1178 } 1179 } 1180 } else { 1181 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1182 if (A->factortype) { 1183 for (i=0; i<m; i++) { 1184 shift = a->sliidx[i>>3]+(i&0x07); 1185 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1186 /* L part */ 1187 for (j=shift; j<a->diag[i]; j+=8) { 1188 #if defined(PETSC_USE_COMPLEX) 1189 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1190 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1191 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1192 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1193 } else { 1194 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1195 } 1196 #else 1197 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1198 #endif 1199 } 1200 /* diagonal */ 1201 j = a->diag[i]; 1202 #if defined(PETSC_USE_COMPLEX) 1203 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1204 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1205 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1206 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1207 } else { 1208 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1209 } 1210 #else 1211 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1212 #endif 1213 1214 /* U part */ 1215 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1216 #if defined(PETSC_USE_COMPLEX) 1217 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1218 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1219 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1220 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1221 } else { 1222 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1223 } 1224 #else 1225 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1226 #endif 1227 } 1228 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1229 } 1230 } else { 1231 for (i=0; i<m; i++) { 1232 shift = a->sliidx[i>>3]+(i&0x07); 1233 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1234 for (j=0; j<a->rlen[i]; j++) { 1235 #if defined(PETSC_USE_COMPLEX) 1236 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1237 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1238 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1239 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1240 } else { 1241 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1242 } 1243 #else 1244 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1245 #endif 1246 } 1247 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1248 } 1249 } 1250 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1251 } 1252 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #include <petscdraw.h> 1257 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1258 { 1259 Mat A=(Mat)Aa; 1260 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1261 PetscInt i,j,m=A->rmap->n,shift; 1262 int color; 1263 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1264 PetscViewer viewer; 1265 PetscViewerFormat format; 1266 PetscErrorCode ierr; 1267 1268 PetscFunctionBegin; 1269 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1270 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1271 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1272 1273 /* loop over matrix elements drawing boxes */ 1274 1275 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1276 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1277 /* Blue for negative, Cyan for zero and Red for positive */ 1278 color = PETSC_DRAW_BLUE; 1279 for (i=0; i<m; i++) { 1280 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1281 y_l = m - i - 1.0; y_r = y_l + 1.0; 1282 for (j=0; j<a->rlen[i]; j++) { 1283 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1284 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1285 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1286 } 1287 } 1288 color = PETSC_DRAW_CYAN; 1289 for (i=0; i<m; i++) { 1290 shift = a->sliidx[i>>3]+(i&0x07); 1291 y_l = m - i - 1.0; y_r = y_l + 1.0; 1292 for (j=0; j<a->rlen[i]; j++) { 1293 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1294 if (a->val[shift+8*j] != 0.) continue; 1295 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1296 } 1297 } 1298 color = PETSC_DRAW_RED; 1299 for (i=0; i<m; i++) { 1300 shift = a->sliidx[i>>3]+(i&0x07); 1301 y_l = m - i - 1.0; y_r = y_l + 1.0; 1302 for (j=0; j<a->rlen[i]; j++) { 1303 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1304 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1305 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1306 } 1307 } 1308 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1309 } else { 1310 /* use contour shading to indicate magnitude of values */ 1311 /* first determine max of all nonzero values */ 1312 PetscReal minv=0.0,maxv=0.0; 1313 PetscInt count=0; 1314 PetscDraw popup; 1315 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1316 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1317 } 1318 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1319 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1320 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1321 1322 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1323 for (i=0; i<m; i++) { 1324 shift = a->sliidx[i>>3]+(i&0x07); 1325 y_l = m - i - 1.0; 1326 y_r = y_l + 1.0; 1327 for (j=0; j<a->rlen[i]; j++) { 1328 x_l = a->colidx[shift+j*8]; 1329 x_r = x_l + 1.0; 1330 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1331 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1332 count++; 1333 } 1334 } 1335 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1336 } 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #include <petscdraw.h> 1341 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1342 { 1343 PetscDraw draw; 1344 PetscReal xr,yr,xl,yl,h,w; 1345 PetscBool isnull; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1350 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1351 if (isnull) PetscFunctionReturn(0); 1352 1353 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1354 xr += w; yr += h; xl = -w; yl = -h; 1355 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1356 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1357 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1358 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1359 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1364 { 1365 PetscBool iascii,isbinary,isdraw; 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1370 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1371 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1372 if (iascii) { 1373 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1374 } else if (isbinary) { 1375 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1376 } else if (isdraw) { 1377 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1383 { 1384 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1385 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1386 MatScalar *vp; 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1391 /* To do: compress out the unused elements */ 1392 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1393 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1394 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1395 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1396 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1397 for (i=0; i<a->totalslices; ++i) { 1398 shift = a->sliidx[i]; /* starting index of the slice */ 1399 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1400 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1401 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1402 row = 8*i + row_in_slice; 1403 nrow = a->rlen[row]; /* number of nonzeros in row */ 1404 /* 1405 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1406 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1407 */ 1408 lastcol = 0; 1409 if (nrow>0) { /* nonempty row */ 1410 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1411 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1412 for (j=1;j<8;j++) { 1413 if (a->rlen[8*i+j]) { 1414 lastcol = cp[j]; 1415 break; 1416 } 1417 } 1418 } else { 1419 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1420 } 1421 1422 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1423 cp[8*k+row_in_slice] = lastcol; 1424 vp[8*k+row_in_slice] = (MatScalar)0; 1425 } 1426 } 1427 } 1428 1429 A->info.mallocs += a->reallocs; 1430 a->reallocs = 0; 1431 1432 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1437 { 1438 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1439 1440 PetscFunctionBegin; 1441 info->block_size = 1.0; 1442 info->nz_allocated = (double)a->maxallocmat; 1443 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1444 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1445 info->assemblies = (double)A->num_ass; 1446 info->mallocs = (double)A->info.mallocs; 1447 info->memory = ((PetscObject)A)->mem; 1448 if (A->factortype) { 1449 info->fill_ratio_given = A->info.fill_ratio_given; 1450 info->fill_ratio_needed = A->info.fill_ratio_needed; 1451 info->factor_mallocs = A->info.factor_mallocs; 1452 } else { 1453 info->fill_ratio_given = 0; 1454 info->fill_ratio_needed = 0; 1455 info->factor_mallocs = 0; 1456 } 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1461 { 1462 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1463 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1464 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1465 MatScalar *vp,value; 1466 PetscErrorCode ierr; 1467 1468 PetscFunctionBegin; 1469 for (k=0; k<m; k++) { /* loop over added rows */ 1470 row = im[k]; 1471 if (row < 0) continue; 1472 #if defined(PETSC_USE_DEBUG) 1473 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1474 #endif 1475 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1476 cp = a->colidx+shift; /* pointer to the row */ 1477 vp = a->val+shift; /* pointer to the row */ 1478 nrow = a->rlen[row]; 1479 low = 0; 1480 high = nrow; 1481 1482 for (l=0; l<n; l++) { /* loop over added columns */ 1483 col = in[l]; 1484 if (col<0) continue; 1485 #if defined(PETSC_USE_DEBUG) 1486 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1487 #endif 1488 if (a->roworiented) { 1489 value = v[l+k*n]; 1490 } else { 1491 value = v[k+l*m]; 1492 } 1493 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1494 1495 /* search in this row for the specified colmun, i indicates the column to be set */ 1496 if (col <= lastcol) low = 0; 1497 else high = nrow; 1498 lastcol = col; 1499 while (high-low > 5) { 1500 t = (low+high)/2; 1501 if (*(cp+t*8) > col) high = t; 1502 else low = t; 1503 } 1504 for (i=low; i<high; i++) { 1505 if (*(cp+i*8) > col) break; 1506 if (*(cp+i*8) == col) { 1507 if (is == ADD_VALUES) *(vp+i*8) += value; 1508 else *(vp+i*8) = value; 1509 low = i + 1; 1510 goto noinsert; 1511 } 1512 } 1513 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1514 if (nonew == 1) goto noinsert; 1515 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1516 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1517 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1518 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1519 for (ii=nrow-1; ii>=i; ii--) { 1520 *(cp+(ii+1)*8) = *(cp+ii*8); 1521 *(vp+(ii+1)*8) = *(vp+ii*8); 1522 } 1523 a->rlen[row]++; 1524 *(cp+i*8) = col; 1525 *(vp+i*8) = value; 1526 a->nz++; 1527 A->nonzerostate++; 1528 low = i+1; high++; nrow++; 1529 noinsert:; 1530 } 1531 a->rlen[row] = nrow; 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1537 { 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 /* If the two matrices have the same copy implementation, use fast copy. */ 1542 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1543 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1544 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1545 1546 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1547 ierr = PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 1554 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1555 { 1556 PetscErrorCode ierr; 1557 1558 PetscFunctionBegin; 1559 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1564 { 1565 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1566 1567 PetscFunctionBegin; 1568 *array = a->val; 1569 PetscFunctionReturn(0); 1570 } 1571 1572 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1573 { 1574 PetscFunctionBegin; 1575 PetscFunctionReturn(0); 1576 } 1577 1578 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1579 { 1580 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1581 PetscInt i; 1582 MatScalar *aval=a->val; 1583 1584 PetscFunctionBegin; 1585 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1590 { 1591 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1592 PetscInt i; 1593 MatScalar *aval=a->val; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1598 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1603 { 1604 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1605 MatScalar *aval=a->val; 1606 PetscScalar oalpha=alpha; 1607 PetscBLASInt one=1,size; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1612 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1613 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1614 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1619 { 1620 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1621 PetscErrorCode ierr; 1622 1623 PetscFunctionBegin; 1624 if (!Y->preallocated || !y->nz) { 1625 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1626 } 1627 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1632 { 1633 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1634 PetscScalar *x,sum,*t; 1635 const MatScalar *idiag=0,*mdiag; 1636 const PetscScalar *b,*xb; 1637 PetscInt n,m=A->rmap->n,i,j,shift; 1638 const PetscInt *diag; 1639 PetscErrorCode ierr; 1640 1641 PetscFunctionBegin; 1642 its = its*lits; 1643 1644 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1645 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1646 a->fshift = fshift; 1647 a->omega = omega; 1648 1649 diag = a->diag; 1650 t = a->ssor_work; 1651 idiag = a->idiag; 1652 mdiag = a->mdiag; 1653 1654 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1655 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1656 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1657 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1658 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1659 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1660 1661 if (flag & SOR_ZERO_INITIAL_GUESS) { 1662 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1663 for (i=0; i<m; i++) { 1664 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1665 sum = b[i]; 1666 n = (diag[i]-shift)/8; 1667 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1668 t[i] = sum; 1669 x[i] = sum*idiag[i]; 1670 } 1671 xb = t; 1672 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1673 } else xb = b; 1674 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1675 for (i=m-1; i>=0; i--) { 1676 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1677 sum = xb[i]; 1678 n = a->rlen[i]-(diag[i]-shift)/8-1; 1679 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1680 if (xb == b) { 1681 x[i] = sum*idiag[i]; 1682 } else { 1683 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1684 } 1685 } 1686 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1687 } 1688 its--; 1689 } 1690 while (its--) { 1691 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1692 for (i=0; i<m; i++) { 1693 /* lower */ 1694 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1695 sum = b[i]; 1696 n = (diag[i]-shift)/8; 1697 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1698 t[i] = sum; /* save application of the lower-triangular part */ 1699 /* upper */ 1700 n = a->rlen[i]-(diag[i]-shift)/8-1; 1701 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1702 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1703 } 1704 xb = t; 1705 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1706 } else xb = b; 1707 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1708 for (i=m-1; i>=0; i--) { 1709 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1710 sum = xb[i]; 1711 if (xb == b) { 1712 /* whole matrix (no checkpointing available) */ 1713 n = a->rlen[i]; 1714 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1715 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1716 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1717 n = a->rlen[i]-(diag[i]-shift)/8-1; 1718 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1719 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1720 } 1721 } 1722 if (xb == b) { 1723 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1724 } else { 1725 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1726 } 1727 } 1728 } 1729 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1730 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 1734 /* -------------------------------------------------------------------*/ 1735 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1736 MatGetRow_SeqSELL, 1737 MatRestoreRow_SeqSELL, 1738 MatMult_SeqSELL, 1739 /* 4*/ MatMultAdd_SeqSELL, 1740 MatMultTranspose_SeqSELL, 1741 MatMultTransposeAdd_SeqSELL, 1742 0, 1743 0, 1744 0, 1745 /* 10*/ 0, 1746 0, 1747 0, 1748 MatSOR_SeqSELL, 1749 0, 1750 /* 15*/ MatGetInfo_SeqSELL, 1751 MatEqual_SeqSELL, 1752 MatGetDiagonal_SeqSELL, 1753 MatDiagonalScale_SeqSELL, 1754 0, 1755 /* 20*/ 0, 1756 MatAssemblyEnd_SeqSELL, 1757 MatSetOption_SeqSELL, 1758 MatZeroEntries_SeqSELL, 1759 /* 24*/ 0, 1760 0, 1761 0, 1762 0, 1763 0, 1764 /* 29*/ MatSetUp_SeqSELL, 1765 0, 1766 0, 1767 0, 1768 0, 1769 /* 34*/ MatDuplicate_SeqSELL, 1770 0, 1771 0, 1772 0, 1773 0, 1774 /* 39*/ 0, 1775 0, 1776 0, 1777 MatGetValues_SeqSELL, 1778 MatCopy_SeqSELL, 1779 /* 44*/ 0, 1780 MatScale_SeqSELL, 1781 MatShift_SeqSELL, 1782 0, 1783 0, 1784 /* 49*/ 0, 1785 0, 1786 0, 1787 0, 1788 0, 1789 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1790 0, 1791 0, 1792 0, 1793 0, 1794 /* 59*/ 0, 1795 MatDestroy_SeqSELL, 1796 MatView_SeqSELL, 1797 0, 1798 0, 1799 /* 64*/ 0, 1800 0, 1801 0, 1802 0, 1803 0, 1804 /* 69*/ 0, 1805 0, 1806 0, 1807 0, 1808 0, 1809 /* 74*/ 0, 1810 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1811 0, 1812 0, 1813 0, 1814 /* 79*/ 0, 1815 0, 1816 0, 1817 0, 1818 0, 1819 /* 84*/ 0, 1820 0, 1821 0, 1822 0, 1823 0, 1824 /* 89*/ 0, 1825 0, 1826 0, 1827 0, 1828 0, 1829 /* 94*/ 0, 1830 0, 1831 0, 1832 0, 1833 0, 1834 /* 99*/ 0, 1835 0, 1836 0, 1837 MatConjugate_SeqSELL, 1838 0, 1839 /*104*/ 0, 1840 0, 1841 0, 1842 0, 1843 0, 1844 /*109*/ 0, 1845 0, 1846 0, 1847 0, 1848 MatMissingDiagonal_SeqSELL, 1849 /*114*/ 0, 1850 0, 1851 0, 1852 0, 1853 0, 1854 /*119*/ 0, 1855 0, 1856 0, 1857 0, 1858 0, 1859 /*124*/ 0, 1860 0, 1861 0, 1862 0, 1863 0, 1864 /*129*/ 0, 1865 0, 1866 0, 1867 0, 1868 0, 1869 /*134*/ 0, 1870 0, 1871 0, 1872 0, 1873 0, 1874 /*139*/ 0, 1875 0, 1876 0, 1877 MatFDColoringSetUp_SeqXAIJ, 1878 0, 1879 /*144*/0 1880 }; 1881 1882 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1883 { 1884 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1889 1890 /* allocate space for values if not already there */ 1891 if (!a->saved_values) { 1892 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1893 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1894 } 1895 1896 /* copy values over */ 1897 ierr = PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1898 PetscFunctionReturn(0); 1899 } 1900 1901 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1902 { 1903 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1908 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1909 ierr = PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 /*@C 1914 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1915 1916 Not Collective 1917 1918 Input Parameters: 1919 . mat - a MATSEQSELL matrix 1920 . array - pointer to the data 1921 1922 Level: intermediate 1923 1924 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1925 @*/ 1926 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1927 { 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1936 { 1937 Mat_SeqSELL *b; 1938 PetscMPIInt size; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1943 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1944 1945 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1946 1947 B->data = (void*)b; 1948 1949 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1950 1951 b->row = 0; 1952 b->col = 0; 1953 b->icol = 0; 1954 b->reallocs = 0; 1955 b->ignorezeroentries = PETSC_FALSE; 1956 b->roworiented = PETSC_TRUE; 1957 b->nonew = 0; 1958 b->diag = 0; 1959 b->solve_work = 0; 1960 B->spptr = 0; 1961 b->saved_values = 0; 1962 b->idiag = 0; 1963 b->mdiag = 0; 1964 b->ssor_work = 0; 1965 b->omega = 1.0; 1966 b->fshift = 0.0; 1967 b->idiagvalid = PETSC_FALSE; 1968 b->keepnonzeropattern = PETSC_FALSE; 1969 1970 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1971 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1972 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1973 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* 1981 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1982 */ 1983 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1984 { 1985 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1986 PetscInt i,m=A->rmap->n; 1987 PetscInt totalslices=a->totalslices; 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 c = (Mat_SeqSELL*)C->data; 1992 1993 C->factortype = A->factortype; 1994 c->row = 0; 1995 c->col = 0; 1996 c->icol = 0; 1997 c->reallocs = 0; 1998 1999 C->assembled = PETSC_TRUE; 2000 2001 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2002 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2003 2004 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2005 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2006 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2007 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2008 2009 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2010 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2011 2012 /* allocate the matrix space */ 2013 if (mallocmatspace) { 2014 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2015 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2016 2017 c->singlemalloc = PETSC_TRUE; 2018 2019 if (m > 0) { 2020 ierr = PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);CHKERRQ(ierr); 2021 if (cpvalues == MAT_COPY_VALUES) { 2022 ierr = PetscArraycpy(c->val,a->val,a->maxallocmat);CHKERRQ(ierr); 2023 } else { 2024 ierr = PetscArrayzero(c->val,a->maxallocmat);CHKERRQ(ierr); 2025 } 2026 } 2027 } 2028 2029 c->ignorezeroentries = a->ignorezeroentries; 2030 c->roworiented = a->roworiented; 2031 c->nonew = a->nonew; 2032 if (a->diag) { 2033 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2034 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2035 for (i=0; i<m; i++) { 2036 c->diag[i] = a->diag[i]; 2037 } 2038 } else c->diag = 0; 2039 2040 c->solve_work = 0; 2041 c->saved_values = 0; 2042 c->idiag = 0; 2043 c->ssor_work = 0; 2044 c->keepnonzeropattern = a->keepnonzeropattern; 2045 c->free_val = PETSC_TRUE; 2046 c->free_colidx = PETSC_TRUE; 2047 2048 c->maxallocmat = a->maxallocmat; 2049 c->maxallocrow = a->maxallocrow; 2050 c->rlenmax = a->rlenmax; 2051 c->nz = a->nz; 2052 C->preallocated = PETSC_TRUE; 2053 2054 c->nonzerorowcnt = a->nonzerorowcnt; 2055 C->nonzerostate = A->nonzerostate; 2056 2057 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2062 { 2063 PetscErrorCode ierr; 2064 2065 PetscFunctionBegin; 2066 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2067 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2068 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2069 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2070 } 2071 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2072 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 /*@C 2077 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2078 2079 Collective 2080 2081 Input Parameters: 2082 + comm - MPI communicator, set to PETSC_COMM_SELF 2083 . m - number of rows 2084 . n - number of columns 2085 . rlenmax - maximum number of nonzeros in a row 2086 - rlen - array containing the number of nonzeros in the various rows 2087 (possibly different for each row) or NULL 2088 2089 Output Parameter: 2090 . A - the matrix 2091 2092 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2093 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2094 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2095 2096 Notes: 2097 If nnz is given then nz is ignored 2098 2099 Specify the preallocated storage with either rlenmax or rlen (not both). 2100 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2101 allocation. For large problems you MUST preallocate memory or you 2102 will get TERRIBLE performance, see the users' manual chapter on matrices. 2103 2104 Level: intermediate 2105 2106 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2107 2108 @*/ 2109 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2110 { 2111 PetscErrorCode ierr; 2112 2113 PetscFunctionBegin; 2114 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2115 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2116 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2117 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2122 { 2123 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2124 PetscInt totalslices=a->totalslices; 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 /* If the matrix dimensions are not equal,or no of nonzeros */ 2129 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2130 *flg = PETSC_FALSE; 2131 PetscFunctionReturn(0); 2132 } 2133 /* if the a->colidx are the same */ 2134 ierr = PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);CHKERRQ(ierr); 2135 if (!*flg) PetscFunctionReturn(0); 2136 /* if a->val are the same */ 2137 ierr = PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2142 { 2143 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2144 2145 PetscFunctionBegin; 2146 a->idiagvalid = PETSC_FALSE; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2151 { 2152 #if defined(PETSC_USE_COMPLEX) 2153 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2154 PetscInt i; 2155 PetscScalar *val = a->val; 2156 2157 PetscFunctionBegin; 2158 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2159 val[i] = PetscConj(val[i]); 2160 } 2161 #else 2162 PetscFunctionBegin; 2163 #endif 2164 PetscFunctionReturn(0); 2165 } 2166