1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 46 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 47 if (allowzeropivot) { 48 A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 49 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 50 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i); 51 } 52 53 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 54 diag += 1; 55 mdiag += 1; 56 } 57 break; 58 case 2: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 4*diag_offset[i]; 61 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 62 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 63 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 64 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 65 diag += 4; 66 mdiag += 4; 67 } 68 break; 69 case 3: 70 for (i=0; i<mbs; i++) { 71 odiag = v + 9*diag_offset[i]; 72 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 73 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 74 diag[8] = odiag[8]; 75 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 76 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 77 mdiag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 mdiag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 88 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 mdiag += 16; 93 } 94 break; 95 case 5: 96 for (i=0; i<mbs; i++) { 97 odiag = v + 25*diag_offset[i]; 98 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 99 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 100 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 101 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 102 diag += 25; 103 mdiag += 25; 104 } 105 break; 106 case 6: 107 for (i=0; i<mbs; i++) { 108 odiag = v + 36*diag_offset[i]; 109 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 110 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 111 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 112 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 113 diag += 36; 114 mdiag += 36; 115 } 116 break; 117 case 7: 118 for (i=0; i<mbs; i++) { 119 odiag = v + 49*diag_offset[i]; 120 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 121 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 123 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 124 diag += 49; 125 mdiag += 49; 126 } 127 break; 128 default: 129 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 130 for (i=0; i<mbs; i++) { 131 odiag = v + bs2*diag_offset[i]; 132 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 133 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 134 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 135 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136 diag += bs2; 137 mdiag += bs2; 138 } 139 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 140 } 141 a->idiagvalid = PETSC_TRUE; 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatSOR_SeqBAIJ" 147 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 148 { 149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 150 PetscScalar *x,*work,*w,*workt,*t; 151 const MatScalar *v,*aa = a->a, *idiag; 152 const PetscScalar *b,*xb; 153 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 154 PetscErrorCode ierr; 155 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 156 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 157 158 PetscFunctionBegin; 159 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 160 its = its*lits; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 163 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 164 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 165 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 166 167 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 168 169 if (!m) PetscFunctionReturn(0); 170 diag = a->diag; 171 idiag = a->idiag; 172 k = PetscMax(A->rmap->n,A->cmap->n); 173 if (!a->mult_work) { 174 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 175 } 176 if (!a->sor_workt) { 177 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 178 } 179 if (!a->sor_work) { 180 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 181 } 182 work = a->mult_work; 183 t = a->sor_workt; 184 w = a->sor_work; 185 186 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 187 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 188 189 if (flag & SOR_ZERO_INITIAL_GUESS) { 190 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 191 switch (bs) { 192 case 1: 193 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 194 t[0] = b[0]; 195 i2 = 1; 196 idiag += 1; 197 for (i=1; i<m; i++) { 198 v = aa + ai[i]; 199 vi = aj + ai[i]; 200 nz = diag[i] - ai[i]; 201 s[0] = b[i2]; 202 for (j=0; j<nz; j++) { 203 xw[0] = x[vi[j]]; 204 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 205 } 206 t[i2] = s[0]; 207 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 208 x[i2] = xw[0]; 209 idiag += 1; 210 i2 += 1; 211 } 212 break; 213 case 2: 214 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 215 t[0] = b[0]; t[1] = b[1]; 216 i2 = 2; 217 idiag += 4; 218 for (i=1; i<m; i++) { 219 v = aa + 4*ai[i]; 220 vi = aj + ai[i]; 221 nz = diag[i] - ai[i]; 222 s[0] = b[i2]; s[1] = b[i2+1]; 223 for (j=0; j<nz; j++) { 224 idx = 2*vi[j]; 225 it = 4*j; 226 xw[0] = x[idx]; xw[1] = x[1+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 228 } 229 t[i2] = s[0]; t[i2+1] = s[1]; 230 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 231 x[i2] = xw[0]; x[i2+1] = xw[1]; 232 idiag += 4; 233 i2 += 2; 234 } 235 break; 236 case 3: 237 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 238 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 239 i2 = 3; 240 idiag += 9; 241 for (i=1; i<m; i++) { 242 v = aa + 9*ai[i]; 243 vi = aj + ai[i]; 244 nz = diag[i] - ai[i]; 245 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 246 while (nz--) { 247 idx = 3*(*vi++); 248 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 249 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 250 v += 9; 251 } 252 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 253 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 254 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 255 idiag += 9; 256 i2 += 3; 257 } 258 break; 259 case 4: 260 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 261 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 262 i2 = 4; 263 idiag += 16; 264 for (i=1; i<m; i++) { 265 v = aa + 16*ai[i]; 266 vi = aj + ai[i]; 267 nz = diag[i] - ai[i]; 268 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 269 while (nz--) { 270 idx = 4*(*vi++); 271 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 272 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 273 v += 16; 274 } 275 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 276 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 277 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 278 idiag += 16; 279 i2 += 4; 280 } 281 break; 282 case 5: 283 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 284 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 285 i2 = 5; 286 idiag += 25; 287 for (i=1; i<m; i++) { 288 v = aa + 25*ai[i]; 289 vi = aj + ai[i]; 290 nz = diag[i] - ai[i]; 291 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 292 while (nz--) { 293 idx = 5*(*vi++); 294 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 295 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 296 v += 25; 297 } 298 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 299 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 300 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 301 idiag += 25; 302 i2 += 5; 303 } 304 break; 305 case 6: 306 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 307 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 308 i2 = 6; 309 idiag += 36; 310 for (i=1; i<m; i++) { 311 v = aa + 36*ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 315 while (nz--) { 316 idx = 6*(*vi++); 317 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 318 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 319 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 320 v += 36; 321 } 322 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 323 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 324 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 325 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 326 idiag += 36; 327 i2 += 6; 328 } 329 break; 330 case 7: 331 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 332 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 333 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 334 i2 = 7; 335 idiag += 49; 336 for (i=1; i<m; i++) { 337 v = aa + 49*ai[i]; 338 vi = aj + ai[i]; 339 nz = diag[i] - ai[i]; 340 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 341 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 342 while (nz--) { 343 idx = 7*(*vi++); 344 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 345 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 346 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 347 v += 49; 348 } 349 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 350 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 351 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 352 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 353 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 354 idiag += 49; 355 i2 += 7; 356 } 357 break; 358 default: 359 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 360 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 i2 = bs; 362 idiag += bs2; 363 for (i=1; i<m; i++) { 364 v = aa + bs2*ai[i]; 365 vi = aj + ai[i]; 366 nz = diag[i] - ai[i]; 367 368 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 369 /* copy all rows of x that are needed into contiguous space */ 370 workt = work; 371 for (j=0; j<nz; j++) { 372 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 373 workt += bs; 374 } 375 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 376 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 377 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 378 379 idiag += bs2; 380 i2 += bs; 381 } 382 break; 383 } 384 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 385 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 386 xb = t; 387 } 388 else xb = b; 389 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 390 idiag = a->idiag+bs2*(a->mbs-1); 391 i2 = bs * (m-1); 392 switch (bs) { 393 case 1: 394 s[0] = xb[i2]; 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 i2 -= 1; 398 for (i=m-2; i>=0; i--) { 399 v = aa + (diag[i]+1); 400 vi = aj + diag[i] + 1; 401 nz = ai[i+1] - diag[i] - 1; 402 s[0] = xb[i2]; 403 for (j=0; j<nz; j++) { 404 xw[0] = x[vi[j]]; 405 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 406 } 407 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 408 x[i2] = xw[0]; 409 idiag -= 1; 410 i2 -= 1; 411 } 412 break; 413 case 2: 414 s[0] = xb[i2]; s[1] = xb[i2+1]; 415 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 416 x[i2] = xw[0]; x[i2+1] = xw[1]; 417 i2 -= 2; 418 idiag -= 4; 419 for (i=m-2; i>=0; i--) { 420 v = aa + 4*(diag[i] + 1); 421 vi = aj + diag[i] + 1; 422 nz = ai[i+1] - diag[i] - 1; 423 s[0] = xb[i2]; s[1] = xb[i2+1]; 424 for (j=0; j<nz; j++) { 425 idx = 2*vi[j]; 426 it = 4*j; 427 xw[0] = x[idx]; xw[1] = x[1+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 429 } 430 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 431 x[i2] = xw[0]; x[i2+1] = xw[1]; 432 idiag -= 4; 433 i2 -= 2; 434 } 435 break; 436 case 3: 437 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 438 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 439 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 440 i2 -= 3; 441 idiag -= 9; 442 for (i=m-2; i>=0; i--) { 443 v = aa + 9*(diag[i]+1); 444 vi = aj + diag[i] + 1; 445 nz = ai[i+1] - diag[i] - 1; 446 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 447 while (nz--) { 448 idx = 3*(*vi++); 449 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 450 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 451 v += 9; 452 } 453 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 454 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 455 idiag -= 9; 456 i2 -= 3; 457 } 458 break; 459 case 4: 460 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 461 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 462 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 463 i2 -= 4; 464 idiag -= 16; 465 for (i=m-2; i>=0; i--) { 466 v = aa + 16*(diag[i]+1); 467 vi = aj + diag[i] + 1; 468 nz = ai[i+1] - diag[i] - 1; 469 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 473 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 474 v += 16; 475 } 476 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 477 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 478 idiag -= 16; 479 i2 -= 4; 480 } 481 break; 482 case 5: 483 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 484 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 485 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 486 i2 -= 5; 487 idiag -= 25; 488 for (i=m-2; i>=0; i--) { 489 v = aa + 25*(diag[i]+1); 490 vi = aj + diag[i] + 1; 491 nz = ai[i+1] - diag[i] - 1; 492 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 493 while (nz--) { 494 idx = 5*(*vi++); 495 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 497 v += 25; 498 } 499 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 500 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 501 idiag -= 25; 502 i2 -= 5; 503 } 504 break; 505 case 6: 506 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 507 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 508 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 509 i2 -= 6; 510 idiag -= 36; 511 for (i=m-2; i>=0; i--) { 512 v = aa + 36*(diag[i]+1); 513 vi = aj + diag[i] + 1; 514 nz = ai[i+1] - diag[i] - 1; 515 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 516 while (nz--) { 517 idx = 6*(*vi++); 518 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 519 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 520 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 521 v += 36; 522 } 523 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 524 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 525 idiag -= 36; 526 i2 -= 6; 527 } 528 break; 529 case 7: 530 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 531 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 532 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 533 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 534 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 535 i2 -= 7; 536 idiag -= 49; 537 for (i=m-2; i>=0; i--) { 538 v = aa + 49*(diag[i]+1); 539 vi = aj + diag[i] + 1; 540 nz = ai[i+1] - diag[i] - 1; 541 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 542 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 543 while (nz--) { 544 idx = 7*(*vi++); 545 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 546 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 547 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 548 v += 49; 549 } 550 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 551 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 552 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 553 idiag -= 49; 554 i2 -= 7; 555 } 556 break; 557 default: 558 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 559 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 560 i2 -= bs; 561 idiag -= bs2; 562 for (i=m-2; i>=0; i--) { 563 v = aa + bs2*(diag[i]+1); 564 vi = aj + diag[i] + 1; 565 nz = ai[i+1] - diag[i] - 1; 566 567 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 568 /* copy all rows of x that are needed into contiguous space */ 569 workt = work; 570 for (j=0; j<nz; j++) { 571 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 572 workt += bs; 573 } 574 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 575 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 576 577 idiag -= bs2; 578 i2 -= bs; 579 } 580 break; 581 } 582 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 583 } 584 its--; 585 } 586 while (its--) { 587 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 588 idiag = a->idiag; 589 i2 = 0; 590 switch (bs) { 591 case 1: 592 for (i=0; i<m; i++) { 593 v = aa + ai[i]; 594 vi = aj + ai[i]; 595 nz = ai[i+1] - ai[i]; 596 s[0] = b[i2]; 597 for (j=0; j<nz; j++) { 598 xw[0] = x[vi[j]]; 599 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 600 } 601 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 602 x[i2] += xw[0]; 603 idiag += 1; 604 i2 += 1; 605 } 606 break; 607 case 2: 608 for (i=0; i<m; i++) { 609 v = aa + 4*ai[i]; 610 vi = aj + ai[i]; 611 nz = ai[i+1] - ai[i]; 612 s[0] = b[i2]; s[1] = b[i2+1]; 613 for (j=0; j<nz; j++) { 614 idx = 2*vi[j]; 615 it = 4*j; 616 xw[0] = x[idx]; xw[1] = x[1+idx]; 617 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 618 } 619 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 620 x[i2] += xw[0]; x[i2+1] += xw[1]; 621 idiag += 4; 622 i2 += 2; 623 } 624 break; 625 case 3: 626 for (i=0; i<m; i++) { 627 v = aa + 9*ai[i]; 628 vi = aj + ai[i]; 629 nz = ai[i+1] - ai[i]; 630 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 631 while (nz--) { 632 idx = 3*(*vi++); 633 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 634 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 635 v += 9; 636 } 637 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 638 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 639 idiag += 9; 640 i2 += 3; 641 } 642 break; 643 case 4: 644 for (i=0; i<m; i++) { 645 v = aa + 16*ai[i]; 646 vi = aj + ai[i]; 647 nz = ai[i+1] - ai[i]; 648 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 649 while (nz--) { 650 idx = 4*(*vi++); 651 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 652 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 653 v += 16; 654 } 655 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 656 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 657 idiag += 16; 658 i2 += 4; 659 } 660 break; 661 case 5: 662 for (i=0; i<m; i++) { 663 v = aa + 25*ai[i]; 664 vi = aj + ai[i]; 665 nz = ai[i+1] - ai[i]; 666 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 667 while (nz--) { 668 idx = 5*(*vi++); 669 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 670 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 671 v += 25; 672 } 673 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 674 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 675 idiag += 25; 676 i2 += 5; 677 } 678 break; 679 case 6: 680 for (i=0; i<m; i++) { 681 v = aa + 36*ai[i]; 682 vi = aj + ai[i]; 683 nz = ai[i+1] - ai[i]; 684 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 685 while (nz--) { 686 idx = 6*(*vi++); 687 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 688 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 689 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 690 v += 36; 691 } 692 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 693 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 694 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 695 idiag += 36; 696 i2 += 6; 697 } 698 break; 699 case 7: 700 for (i=0; i<m; i++) { 701 v = aa + 49*ai[i]; 702 vi = aj + ai[i]; 703 nz = ai[i+1] - ai[i]; 704 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 705 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 706 while (nz--) { 707 idx = 7*(*vi++); 708 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 709 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 710 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 711 v += 49; 712 } 713 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 714 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 715 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 716 idiag += 49; 717 i2 += 7; 718 } 719 break; 720 default: 721 for (i=0; i<m; i++) { 722 v = aa + bs2*ai[i]; 723 vi = aj + ai[i]; 724 nz = ai[i+1] - ai[i]; 725 726 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 727 /* copy all rows of x that are needed into contiguous space */ 728 workt = work; 729 for (j=0; j<nz; j++) { 730 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 731 workt += bs; 732 } 733 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 734 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 735 736 idiag += bs2; 737 i2 += bs; 738 } 739 break; 740 } 741 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 742 } 743 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 744 idiag = a->idiag+bs2*(a->mbs-1); 745 i2 = bs * (m-1); 746 switch (bs) { 747 case 1: 748 for (i=m-1; i>=0; i--) { 749 v = aa + ai[i]; 750 vi = aj + ai[i]; 751 nz = ai[i+1] - ai[i]; 752 s[0] = b[i2]; 753 for (j=0; j<nz; j++) { 754 xw[0] = x[vi[j]]; 755 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 756 } 757 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 758 x[i2] += xw[0]; 759 idiag -= 1; 760 i2 -= 1; 761 } 762 break; 763 case 2: 764 for (i=m-1; i>=0; i--) { 765 v = aa + 4*ai[i]; 766 vi = aj + ai[i]; 767 nz = ai[i+1] - ai[i]; 768 s[0] = b[i2]; s[1] = b[i2+1]; 769 for (j=0; j<nz; j++) { 770 idx = 2*vi[j]; 771 it = 4*j; 772 xw[0] = x[idx]; xw[1] = x[1+idx]; 773 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 774 } 775 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 776 x[i2] += xw[0]; x[i2+1] += xw[1]; 777 idiag -= 4; 778 i2 -= 2; 779 } 780 break; 781 case 3: 782 for (i=m-1; i>=0; i--) { 783 v = aa + 9*ai[i]; 784 vi = aj + ai[i]; 785 nz = ai[i+1] - ai[i]; 786 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 787 while (nz--) { 788 idx = 3*(*vi++); 789 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 790 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 791 v += 9; 792 } 793 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 794 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 795 idiag -= 9; 796 i2 -= 3; 797 } 798 break; 799 case 4: 800 for (i=m-1; i>=0; i--) { 801 v = aa + 16*ai[i]; 802 vi = aj + ai[i]; 803 nz = ai[i+1] - ai[i]; 804 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 805 while (nz--) { 806 idx = 4*(*vi++); 807 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 808 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 809 v += 16; 810 } 811 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 812 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 813 idiag -= 16; 814 i2 -= 4; 815 } 816 break; 817 case 5: 818 for (i=m-1; i>=0; i--) { 819 v = aa + 25*ai[i]; 820 vi = aj + ai[i]; 821 nz = ai[i+1] - ai[i]; 822 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 823 while (nz--) { 824 idx = 5*(*vi++); 825 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 826 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 827 v += 25; 828 } 829 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 830 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 831 idiag -= 25; 832 i2 -= 5; 833 } 834 break; 835 case 6: 836 for (i=m-1; i>=0; i--) { 837 v = aa + 36*ai[i]; 838 vi = aj + ai[i]; 839 nz = ai[i+1] - ai[i]; 840 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 841 while (nz--) { 842 idx = 6*(*vi++); 843 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 844 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 845 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 846 v += 36; 847 } 848 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 849 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 850 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 851 idiag -= 36; 852 i2 -= 6; 853 } 854 break; 855 case 7: 856 for (i=m-1; i>=0; i--) { 857 v = aa + 49*ai[i]; 858 vi = aj + ai[i]; 859 nz = ai[i+1] - ai[i]; 860 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 861 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 862 while (nz--) { 863 idx = 7*(*vi++); 864 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 865 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 866 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 867 v += 49; 868 } 869 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 870 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 871 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 872 idiag -= 49; 873 i2 -= 7; 874 } 875 break; 876 default: 877 for (i=m-1; i>=0; i--) { 878 v = aa + bs2*ai[i]; 879 vi = aj + ai[i]; 880 nz = ai[i+1] - ai[i]; 881 882 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 883 /* copy all rows of x that are needed into contiguous space */ 884 workt = work; 885 for (j=0; j<nz; j++) { 886 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 887 workt += bs; 888 } 889 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 890 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 891 892 idiag -= bs2; 893 i2 -= bs; 894 } 895 break; 896 } 897 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 898 } 899 } 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 906 /* 907 Special version for direct calls from Fortran (Used in PETSc-fun3d) 908 */ 909 #if defined(PETSC_HAVE_FORTRAN_CAPS) 910 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 911 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 912 #define matsetvaluesblocked4_ matsetvaluesblocked4 913 #endif 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "matsetvaluesblocked4_" 917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 918 { 919 Mat A = *AA; 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 921 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 922 PetscInt *ai =a->i,*ailen=a->ilen; 923 PetscInt *aj =a->j,stepval,lastcol = -1; 924 const PetscScalar *value = v; 925 MatScalar *ap,*aa = a->a,*bap; 926 927 PetscFunctionBegin; 928 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 929 stepval = (n-1)*4; 930 for (k=0; k<m; k++) { /* loop over added rows */ 931 row = im[k]; 932 rp = aj + ai[row]; 933 ap = aa + 16*ai[row]; 934 nrow = ailen[row]; 935 low = 0; 936 high = nrow; 937 for (l=0; l<n; l++) { /* loop over added columns */ 938 col = in[l]; 939 if (col <= lastcol) low = 0; 940 else high = nrow; 941 lastcol = col; 942 value = v + k*(stepval+4 + l)*4; 943 while (high-low > 7) { 944 t = (low+high)/2; 945 if (rp[t] > col) high = t; 946 else low = t; 947 } 948 for (i=low; i<high; i++) { 949 if (rp[i] > col) break; 950 if (rp[i] == col) { 951 bap = ap + 16*i; 952 for (ii=0; ii<4; ii++,value+=stepval) { 953 for (jj=ii; jj<16; jj+=4) { 954 bap[jj] += *value++; 955 } 956 } 957 goto noinsert2; 958 } 959 } 960 N = nrow++ - 1; 961 high++; /* added new column index thus must search to one higher than before */ 962 /* shift up all the later entries in this row */ 963 for (ii=N; ii>=i; ii--) { 964 rp[ii+1] = rp[ii]; 965 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 966 } 967 if (N >= i) { 968 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 969 } 970 rp[i] = col; 971 bap = ap + 16*i; 972 for (ii=0; ii<4; ii++,value+=stepval) { 973 for (jj=ii; jj<16; jj+=4) { 974 bap[jj] = *value++; 975 } 976 } 977 noinsert2:; 978 low = i; 979 } 980 ailen[row] = nrow; 981 } 982 PetscFunctionReturnVoid(); 983 } 984 985 #if defined(PETSC_HAVE_FORTRAN_CAPS) 986 #define matsetvalues4_ MATSETVALUES4 987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 988 #define matsetvalues4_ matsetvalues4 989 #endif 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatSetValues4_" 993 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 994 { 995 Mat A = *AA; 996 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 997 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 998 PetscInt *ai=a->i,*ailen=a->ilen; 999 PetscInt *aj=a->j,brow,bcol; 1000 PetscInt ridx,cidx,lastcol = -1; 1001 MatScalar *ap,value,*aa=a->a,*bap; 1002 1003 PetscFunctionBegin; 1004 for (k=0; k<m; k++) { /* loop over added rows */ 1005 row = im[k]; brow = row/4; 1006 rp = aj + ai[brow]; 1007 ap = aa + 16*ai[brow]; 1008 nrow = ailen[brow]; 1009 low = 0; 1010 high = nrow; 1011 for (l=0; l<n; l++) { /* loop over added columns */ 1012 col = in[l]; bcol = col/4; 1013 ridx = row % 4; cidx = col % 4; 1014 value = v[l + k*n]; 1015 if (col <= lastcol) low = 0; 1016 else high = nrow; 1017 lastcol = col; 1018 while (high-low > 7) { 1019 t = (low+high)/2; 1020 if (rp[t] > bcol) high = t; 1021 else low = t; 1022 } 1023 for (i=low; i<high; i++) { 1024 if (rp[i] > bcol) break; 1025 if (rp[i] == bcol) { 1026 bap = ap + 16*i + 4*cidx + ridx; 1027 *bap += value; 1028 goto noinsert1; 1029 } 1030 } 1031 N = nrow++ - 1; 1032 high++; /* added new column thus must search to one higher than before */ 1033 /* shift up all the later entries in this row */ 1034 for (ii=N; ii>=i; ii--) { 1035 rp[ii+1] = rp[ii]; 1036 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1037 } 1038 if (N>=i) { 1039 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1040 } 1041 rp[i] = bcol; 1042 ap[16*i + 4*cidx + ridx] = value; 1043 noinsert1:; 1044 low = i; 1045 } 1046 ailen[brow] = nrow; 1047 } 1048 PetscFunctionReturnVoid(); 1049 } 1050 1051 /* 1052 Checks for missing diagonals 1053 */ 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1057 { 1058 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1059 PetscErrorCode ierr; 1060 PetscInt *diag,*ii = a->i,i; 1061 1062 PetscFunctionBegin; 1063 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1064 *missing = PETSC_FALSE; 1065 if (A->rmap->n > 0 && !ii) { 1066 *missing = PETSC_TRUE; 1067 if (d) *d = 0; 1068 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1069 } else { 1070 diag = a->diag; 1071 for (i=0; i<a->mbs; i++) { 1072 if (diag[i] >= ii[i+1]) { 1073 *missing = PETSC_TRUE; 1074 if (d) *d = i; 1075 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1076 break; 1077 } 1078 } 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1085 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1086 { 1087 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 PetscInt i,j,m = a->mbs; 1090 1091 PetscFunctionBegin; 1092 if (!a->diag) { 1093 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1094 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1095 a->free_diag = PETSC_TRUE; 1096 } 1097 for (i=0; i<m; i++) { 1098 a->diag[i] = a->i[i+1]; 1099 for (j=a->i[i]; j<a->i[i+1]; j++) { 1100 if (a->j[j] == i) { 1101 a->diag[i] = j; 1102 break; 1103 } 1104 } 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1112 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1113 { 1114 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1115 PetscErrorCode ierr; 1116 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1117 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1118 1119 PetscFunctionBegin; 1120 *nn = n; 1121 if (!ia) PetscFunctionReturn(0); 1122 if (symmetric) { 1123 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1124 nz = tia[n]; 1125 } else { 1126 tia = a->i; tja = a->j; 1127 } 1128 1129 if (!blockcompressed && bs > 1) { 1130 (*nn) *= bs; 1131 /* malloc & create the natural set of indices */ 1132 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1133 if (n) { 1134 (*ia)[0] = 0; 1135 for (j=1; j<bs; j++) { 1136 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1137 } 1138 } 1139 1140 for (i=1; i<n; i++) { 1141 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1142 for (j=1; j<bs; j++) { 1143 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1144 } 1145 } 1146 if (n) { 1147 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1148 } 1149 1150 if (inja) { 1151 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1152 cnt = 0; 1153 for (i=0; i<n; i++) { 1154 for (j=0; j<bs; j++) { 1155 for (k=tia[i]; k<tia[i+1]; k++) { 1156 for (l=0; l<bs; l++) { 1157 (*ja)[cnt++] = bs*tja[k] + l; 1158 } 1159 } 1160 } 1161 } 1162 } 1163 1164 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1165 ierr = PetscFree(tia);CHKERRQ(ierr); 1166 ierr = PetscFree(tja);CHKERRQ(ierr); 1167 } 1168 } else if (oshift == 1) { 1169 if (symmetric) { 1170 nz = tia[A->rmap->n/bs]; 1171 /* add 1 to i and j indices */ 1172 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1173 *ia = tia; 1174 if (ja) { 1175 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1176 *ja = tja; 1177 } 1178 } else { 1179 nz = a->i[A->rmap->n/bs]; 1180 /* malloc space and add 1 to i and j indices */ 1181 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1182 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1183 if (ja) { 1184 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1185 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1186 } 1187 } 1188 } else { 1189 *ia = tia; 1190 if (ja) *ja = tja; 1191 } 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1197 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1198 { 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 if (!ia) PetscFunctionReturn(0); 1203 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1204 ierr = PetscFree(*ia);CHKERRQ(ierr); 1205 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1212 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1213 { 1214 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 #if defined(PETSC_USE_LOG) 1219 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1220 #endif 1221 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1222 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1223 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1224 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1225 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1226 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1227 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1228 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1229 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1230 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1231 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1232 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1233 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1234 1235 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1236 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1237 ierr = PetscFree(A->data);CHKERRQ(ierr); 1238 1239 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1255 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1256 { 1257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 switch (op) { 1262 case MAT_ROW_ORIENTED: 1263 a->roworiented = flg; 1264 break; 1265 case MAT_KEEP_NONZERO_PATTERN: 1266 a->keepnonzeropattern = flg; 1267 break; 1268 case MAT_NEW_NONZERO_LOCATIONS: 1269 a->nonew = (flg ? 0 : 1); 1270 break; 1271 case MAT_NEW_NONZERO_LOCATION_ERR: 1272 a->nonew = (flg ? -1 : 0); 1273 break; 1274 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1275 a->nonew = (flg ? -2 : 0); 1276 break; 1277 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1278 a->nounused = (flg ? -1 : 0); 1279 break; 1280 case MAT_NEW_DIAGONALS: 1281 case MAT_IGNORE_OFF_PROC_ENTRIES: 1282 case MAT_USE_HASH_TABLE: 1283 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1284 break; 1285 case MAT_SPD: 1286 case MAT_SYMMETRIC: 1287 case MAT_STRUCTURALLY_SYMMETRIC: 1288 case MAT_HERMITIAN: 1289 case MAT_SYMMETRY_ETERNAL: 1290 /* These options are handled directly by MatSetOption() */ 1291 break; 1292 default: 1293 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1301 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1302 { 1303 PetscErrorCode ierr; 1304 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1305 MatScalar *aa_i; 1306 PetscScalar *v_i; 1307 1308 PetscFunctionBegin; 1309 bs = A->rmap->bs; 1310 bs2 = bs*bs; 1311 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1312 1313 bn = row/bs; /* Block number */ 1314 bp = row % bs; /* Block Position */ 1315 M = ai[bn+1] - ai[bn]; 1316 *nz = bs*M; 1317 1318 if (v) { 1319 *v = 0; 1320 if (*nz) { 1321 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1322 for (i=0; i<M; i++) { /* for each block in the block row */ 1323 v_i = *v + i*bs; 1324 aa_i = aa + bs2*(ai[bn] + i); 1325 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1326 } 1327 } 1328 } 1329 1330 if (idx) { 1331 *idx = 0; 1332 if (*nz) { 1333 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1334 for (i=0; i<M; i++) { /* for each block in the block row */ 1335 idx_i = *idx + i*bs; 1336 itmp = bs*aj[ai[bn] + i]; 1337 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1338 } 1339 } 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1346 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1347 { 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1358 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1359 { 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1364 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1365 PetscFunctionReturn(0); 1366 } 1367 1368 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1372 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1373 { 1374 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1375 Mat C; 1376 PetscErrorCode ierr; 1377 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1378 PetscInt *rows,*cols,bs2=a->bs2; 1379 MatScalar *array; 1380 1381 PetscFunctionBegin; 1382 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1383 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1384 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1385 1386 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1387 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1388 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1389 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1390 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1391 ierr = PetscFree(col);CHKERRQ(ierr); 1392 } else { 1393 C = *B; 1394 } 1395 1396 array = a->a; 1397 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1398 for (i=0; i<mbs; i++) { 1399 cols[0] = i*bs; 1400 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1401 len = ai[i+1] - ai[i]; 1402 for (j=0; j<len; j++) { 1403 rows[0] = (*aj++)*bs; 1404 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1405 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1406 array += bs2; 1407 } 1408 } 1409 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1410 1411 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 1414 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1415 *B = C; 1416 } else { 1417 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1424 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1425 { 1426 PetscErrorCode ierr; 1427 Mat Btrans; 1428 1429 PetscFunctionBegin; 1430 *f = PETSC_FALSE; 1431 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1432 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1433 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1439 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1440 { 1441 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1442 PetscErrorCode ierr; 1443 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1444 int fd; 1445 PetscScalar *aa; 1446 FILE *file; 1447 1448 PetscFunctionBegin; 1449 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1450 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1451 col_lens[0] = MAT_FILE_CLASSID; 1452 1453 col_lens[1] = A->rmap->N; 1454 col_lens[2] = A->cmap->n; 1455 col_lens[3] = a->nz*bs2; 1456 1457 /* store lengths of each row and write (including header) to file */ 1458 count = 0; 1459 for (i=0; i<a->mbs; i++) { 1460 for (j=0; j<bs; j++) { 1461 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1462 } 1463 } 1464 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1465 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1466 1467 /* store column indices (zero start index) */ 1468 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1469 count = 0; 1470 for (i=0; i<a->mbs; i++) { 1471 for (j=0; j<bs; j++) { 1472 for (k=a->i[i]; k<a->i[i+1]; k++) { 1473 for (l=0; l<bs; l++) { 1474 jj[count++] = bs*a->j[k] + l; 1475 } 1476 } 1477 } 1478 } 1479 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscFree(jj);CHKERRQ(ierr); 1481 1482 /* store nonzero values */ 1483 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1484 count = 0; 1485 for (i=0; i<a->mbs; i++) { 1486 for (j=0; j<bs; j++) { 1487 for (k=a->i[i]; k<a->i[i+1]; k++) { 1488 for (l=0; l<bs; l++) { 1489 aa[count++] = a->a[bs2*k + l*bs + j]; 1490 } 1491 } 1492 } 1493 } 1494 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1495 ierr = PetscFree(aa);CHKERRQ(ierr); 1496 1497 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1498 if (file) { 1499 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1506 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1507 { 1508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1511 PetscViewerFormat format; 1512 1513 PetscFunctionBegin; 1514 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1515 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1516 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1517 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1518 const char *matname; 1519 Mat aij; 1520 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1521 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1522 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1523 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1524 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1525 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1526 PetscFunctionReturn(0); 1527 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1528 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1529 for (i=0; i<a->mbs; i++) { 1530 for (j=0; j<bs; j++) { 1531 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1532 for (k=a->i[i]; k<a->i[i+1]; k++) { 1533 for (l=0; l<bs; l++) { 1534 #if defined(PETSC_USE_COMPLEX) 1535 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1537 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1540 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } 1544 #else 1545 if (a->a[bs2*k + l*bs + j] != 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1547 } 1548 #endif 1549 } 1550 } 1551 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1552 } 1553 } 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1555 } else { 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1557 for (i=0; i<a->mbs; i++) { 1558 for (j=0; j<bs; j++) { 1559 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1560 for (k=a->i[i]; k<a->i[i+1]; k++) { 1561 for (l=0; l<bs; l++) { 1562 #if defined(PETSC_USE_COMPLEX) 1563 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1565 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1568 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } 1572 #else 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1574 #endif 1575 } 1576 } 1577 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1578 } 1579 } 1580 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1581 } 1582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #include <petscdraw.h> 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1590 { 1591 Mat A = (Mat) Aa; 1592 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1595 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1596 MatScalar *aa; 1597 PetscViewer viewer; 1598 PetscViewerFormat format; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1602 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1603 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1604 1605 /* loop over matrix elements drawing boxes */ 1606 1607 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1608 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1609 /* Blue for negative, Cyan for zero and Red for positive */ 1610 color = PETSC_DRAW_BLUE; 1611 for (i=0,row=0; i<mbs; i++,row+=bs) { 1612 for (j=a->i[i]; j<a->i[i+1]; j++) { 1613 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1614 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1615 aa = a->a + j*bs2; 1616 for (k=0; k<bs; k++) { 1617 for (l=0; l<bs; l++) { 1618 if (PetscRealPart(*aa++) >= 0.) continue; 1619 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1620 } 1621 } 1622 } 1623 } 1624 color = PETSC_DRAW_CYAN; 1625 for (i=0,row=0; i<mbs; i++,row+=bs) { 1626 for (j=a->i[i]; j<a->i[i+1]; j++) { 1627 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1628 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1629 aa = a->a + j*bs2; 1630 for (k=0; k<bs; k++) { 1631 for (l=0; l<bs; l++) { 1632 if (PetscRealPart(*aa++) != 0.) continue; 1633 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1634 } 1635 } 1636 } 1637 } 1638 color = PETSC_DRAW_RED; 1639 for (i=0,row=0; i<mbs; i++,row+=bs) { 1640 for (j=a->i[i]; j<a->i[i+1]; j++) { 1641 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1643 aa = a->a + j*bs2; 1644 for (k=0; k<bs; k++) { 1645 for (l=0; l<bs; l++) { 1646 if (PetscRealPart(*aa++) <= 0.) continue; 1647 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1648 } 1649 } 1650 } 1651 } 1652 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1653 } else { 1654 /* use contour shading to indicate magnitude of values */ 1655 /* first determine max of all nonzero values */ 1656 PetscReal minv = 0.0, maxv = 0.0; 1657 PetscDraw popup; 1658 1659 for (i=0; i<a->nz*a->bs2; i++) { 1660 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1661 } 1662 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1663 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1664 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1665 1666 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1667 for (i=0,row=0; i<mbs; i++,row+=bs) { 1668 for (j=a->i[i]; j<a->i[i+1]; j++) { 1669 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1670 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1671 aa = a->a + j*bs2; 1672 for (k=0; k<bs; k++) { 1673 for (l=0; l<bs; l++) { 1674 MatScalar v = *aa++; 1675 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1676 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1688 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1689 { 1690 PetscErrorCode ierr; 1691 PetscReal xl,yl,xr,yr,w,h; 1692 PetscDraw draw; 1693 PetscBool isnull; 1694 1695 PetscFunctionBegin; 1696 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1697 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1698 1699 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1700 xr += w; yr += h; xl = -w; yl = -h; 1701 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1703 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1705 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatView_SeqBAIJ" 1711 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1712 { 1713 PetscErrorCode ierr; 1714 PetscBool iascii,isbinary,isdraw; 1715 1716 PetscFunctionBegin; 1717 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1718 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1719 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1720 if (iascii) { 1721 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1722 } else if (isbinary) { 1723 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1724 } else if (isdraw) { 1725 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1726 } else { 1727 Mat B; 1728 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1729 ierr = MatView(B,viewer);CHKERRQ(ierr); 1730 ierr = MatDestroy(&B);CHKERRQ(ierr); 1731 } 1732 PetscFunctionReturn(0); 1733 } 1734 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1738 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1739 { 1740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1741 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1742 PetscInt *ai = a->i,*ailen = a->ilen; 1743 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1744 MatScalar *ap,*aa = a->a; 1745 1746 PetscFunctionBegin; 1747 for (k=0; k<m; k++) { /* loop over rows */ 1748 row = im[k]; brow = row/bs; 1749 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1750 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1751 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1752 nrow = ailen[brow]; 1753 for (l=0; l<n; l++) { /* loop over columns */ 1754 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1755 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1756 col = in[l]; 1757 bcol = col/bs; 1758 cidx = col%bs; 1759 ridx = row%bs; 1760 high = nrow; 1761 low = 0; /* assume unsorted */ 1762 while (high-low > 5) { 1763 t = (low+high)/2; 1764 if (rp[t] > bcol) high = t; 1765 else low = t; 1766 } 1767 for (i=low; i<high; i++) { 1768 if (rp[i] > bcol) break; 1769 if (rp[i] == bcol) { 1770 *v++ = ap[bs2*i+bs*cidx+ridx]; 1771 goto finished; 1772 } 1773 } 1774 *v++ = 0.0; 1775 finished:; 1776 } 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1783 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1784 { 1785 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1786 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1787 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1788 PetscErrorCode ierr; 1789 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1790 PetscBool roworiented=a->roworiented; 1791 const PetscScalar *value = v; 1792 MatScalar *ap,*aa = a->a,*bap; 1793 1794 PetscFunctionBegin; 1795 if (roworiented) { 1796 stepval = (n-1)*bs; 1797 } else { 1798 stepval = (m-1)*bs; 1799 } 1800 for (k=0; k<m; k++) { /* loop over added rows */ 1801 row = im[k]; 1802 if (row < 0) continue; 1803 #if defined(PETSC_USE_DEBUG) 1804 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1805 #endif 1806 rp = aj + ai[row]; 1807 ap = aa + bs2*ai[row]; 1808 rmax = imax[row]; 1809 nrow = ailen[row]; 1810 low = 0; 1811 high = nrow; 1812 for (l=0; l<n; l++) { /* loop over added columns */ 1813 if (in[l] < 0) continue; 1814 #if defined(PETSC_USE_DEBUG) 1815 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1816 #endif 1817 col = in[l]; 1818 if (roworiented) { 1819 value = v + (k*(stepval+bs) + l)*bs; 1820 } else { 1821 value = v + (l*(stepval+bs) + k)*bs; 1822 } 1823 if (col <= lastcol) low = 0; 1824 else high = nrow; 1825 lastcol = col; 1826 while (high-low > 7) { 1827 t = (low+high)/2; 1828 if (rp[t] > col) high = t; 1829 else low = t; 1830 } 1831 for (i=low; i<high; i++) { 1832 if (rp[i] > col) break; 1833 if (rp[i] == col) { 1834 bap = ap + bs2*i; 1835 if (roworiented) { 1836 if (is == ADD_VALUES) { 1837 for (ii=0; ii<bs; ii++,value+=stepval) { 1838 for (jj=ii; jj<bs2; jj+=bs) { 1839 bap[jj] += *value++; 1840 } 1841 } 1842 } else { 1843 for (ii=0; ii<bs; ii++,value+=stepval) { 1844 for (jj=ii; jj<bs2; jj+=bs) { 1845 bap[jj] = *value++; 1846 } 1847 } 1848 } 1849 } else { 1850 if (is == ADD_VALUES) { 1851 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1852 for (jj=0; jj<bs; jj++) { 1853 bap[jj] += value[jj]; 1854 } 1855 bap += bs; 1856 } 1857 } else { 1858 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1859 for (jj=0; jj<bs; jj++) { 1860 bap[jj] = value[jj]; 1861 } 1862 bap += bs; 1863 } 1864 } 1865 } 1866 goto noinsert2; 1867 } 1868 } 1869 if (nonew == 1) goto noinsert2; 1870 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1871 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1872 N = nrow++ - 1; high++; 1873 /* shift up all the later entries in this row */ 1874 for (ii=N; ii>=i; ii--) { 1875 rp[ii+1] = rp[ii]; 1876 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1877 } 1878 if (N >= i) { 1879 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1880 } 1881 rp[i] = col; 1882 bap = ap + bs2*i; 1883 if (roworiented) { 1884 for (ii=0; ii<bs; ii++,value+=stepval) { 1885 for (jj=ii; jj<bs2; jj+=bs) { 1886 bap[jj] = *value++; 1887 } 1888 } 1889 } else { 1890 for (ii=0; ii<bs; ii++,value+=stepval) { 1891 for (jj=0; jj<bs; jj++) { 1892 *bap++ = *value++; 1893 } 1894 } 1895 } 1896 noinsert2:; 1897 low = i; 1898 } 1899 ailen[row] = nrow; 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1907 { 1908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1909 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1910 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1911 PetscErrorCode ierr; 1912 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1913 MatScalar *aa = a->a,*ap; 1914 PetscReal ratio=0.6; 1915 1916 PetscFunctionBegin; 1917 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1918 1919 if (m) rmax = ailen[0]; 1920 for (i=1; i<mbs; i++) { 1921 /* move each row back by the amount of empty slots (fshift) before it*/ 1922 fshift += imax[i-1] - ailen[i-1]; 1923 rmax = PetscMax(rmax,ailen[i]); 1924 if (fshift) { 1925 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1926 N = ailen[i]; 1927 for (j=0; j<N; j++) { 1928 ip[j-fshift] = ip[j]; 1929 1930 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1931 } 1932 } 1933 ai[i] = ai[i-1] + ailen[i-1]; 1934 } 1935 if (mbs) { 1936 fshift += imax[mbs-1] - ailen[mbs-1]; 1937 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1938 } 1939 1940 /* reset ilen and imax for each row */ 1941 a->nonzerorowcnt = 0; 1942 for (i=0; i<mbs; i++) { 1943 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1944 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1945 } 1946 a->nz = ai[mbs]; 1947 1948 /* diagonals may have moved, so kill the diagonal pointers */ 1949 a->idiagvalid = PETSC_FALSE; 1950 if (fshift && a->diag) { 1951 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1952 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1953 a->diag = 0; 1954 } 1955 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1956 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1957 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1958 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1959 1960 A->info.mallocs += a->reallocs; 1961 a->reallocs = 0; 1962 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1963 a->rmax = rmax; 1964 1965 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1966 PetscFunctionReturn(0); 1967 } 1968 1969 /* 1970 This function returns an array of flags which indicate the locations of contiguous 1971 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1972 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1973 Assume: sizes should be long enough to hold all the values. 1974 */ 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1977 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1978 { 1979 PetscInt i,j,k,row; 1980 PetscBool flg; 1981 1982 PetscFunctionBegin; 1983 for (i=0,j=0; i<n; j++) { 1984 row = idx[i]; 1985 if (row%bs!=0) { /* Not the begining of a block */ 1986 sizes[j] = 1; 1987 i++; 1988 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1989 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1990 i++; 1991 } else { /* Begining of the block, so check if the complete block exists */ 1992 flg = PETSC_TRUE; 1993 for (k=1; k<bs; k++) { 1994 if (row+k != idx[i+k]) { /* break in the block */ 1995 flg = PETSC_FALSE; 1996 break; 1997 } 1998 } 1999 if (flg) { /* No break in the bs */ 2000 sizes[j] = bs; 2001 i += bs; 2002 } else { 2003 sizes[j] = 1; 2004 i++; 2005 } 2006 } 2007 } 2008 *bs_max = j; 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2014 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2015 { 2016 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2017 PetscErrorCode ierr; 2018 PetscInt i,j,k,count,*rows; 2019 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2020 PetscScalar zero = 0.0; 2021 MatScalar *aa; 2022 const PetscScalar *xx; 2023 PetscScalar *bb; 2024 2025 PetscFunctionBegin; 2026 /* fix right hand side if needed */ 2027 if (x && b) { 2028 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2029 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2030 for (i=0; i<is_n; i++) { 2031 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2032 } 2033 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2034 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2035 } 2036 2037 /* Make a copy of the IS and sort it */ 2038 /* allocate memory for rows,sizes */ 2039 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2040 2041 /* copy IS values to rows, and sort them */ 2042 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2043 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2044 2045 if (baij->keepnonzeropattern) { 2046 for (i=0; i<is_n; i++) sizes[i] = 1; 2047 bs_max = is_n; 2048 } else { 2049 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2050 A->nonzerostate++; 2051 } 2052 2053 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2054 row = rows[j]; 2055 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2056 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2057 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2058 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2059 if (diag != (PetscScalar)0.0) { 2060 if (baij->ilen[row/bs] > 0) { 2061 baij->ilen[row/bs] = 1; 2062 baij->j[baij->i[row/bs]] = row/bs; 2063 2064 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2065 } 2066 /* Now insert all the diagonal values for this bs */ 2067 for (k=0; k<bs; k++) { 2068 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2069 } 2070 } else { /* (diag == 0.0) */ 2071 baij->ilen[row/bs] = 0; 2072 } /* end (diag == 0.0) */ 2073 } else { /* (sizes[i] != bs) */ 2074 #if defined(PETSC_USE_DEBUG) 2075 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2076 #endif 2077 for (k=0; k<count; k++) { 2078 aa[0] = zero; 2079 aa += bs; 2080 } 2081 if (diag != (PetscScalar)0.0) { 2082 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2083 } 2084 } 2085 } 2086 2087 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2088 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2094 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2095 { 2096 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2097 PetscErrorCode ierr; 2098 PetscInt i,j,k,count; 2099 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2100 PetscScalar zero = 0.0; 2101 MatScalar *aa; 2102 const PetscScalar *xx; 2103 PetscScalar *bb; 2104 PetscBool *zeroed,vecs = PETSC_FALSE; 2105 2106 PetscFunctionBegin; 2107 /* fix right hand side if needed */ 2108 if (x && b) { 2109 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2110 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2111 vecs = PETSC_TRUE; 2112 } 2113 2114 /* zero the columns */ 2115 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2116 for (i=0; i<is_n; i++) { 2117 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2118 zeroed[is_idx[i]] = PETSC_TRUE; 2119 } 2120 for (i=0; i<A->rmap->N; i++) { 2121 if (!zeroed[i]) { 2122 row = i/bs; 2123 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2124 for (k=0; k<bs; k++) { 2125 col = bs*baij->j[j] + k; 2126 if (zeroed[col]) { 2127 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2128 if (vecs) bb[i] -= aa[0]*xx[col]; 2129 aa[0] = 0.0; 2130 } 2131 } 2132 } 2133 } else if (vecs) bb[i] = diag*xx[i]; 2134 } 2135 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2136 if (vecs) { 2137 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2138 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2139 } 2140 2141 /* zero the rows */ 2142 for (i=0; i<is_n; i++) { 2143 row = is_idx[i]; 2144 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2145 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2146 for (k=0; k<count; k++) { 2147 aa[0] = zero; 2148 aa += bs; 2149 } 2150 if (diag != (PetscScalar)0.0) { 2151 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2152 } 2153 } 2154 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2160 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2161 { 2162 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2163 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2164 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2165 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2166 PetscErrorCode ierr; 2167 PetscInt ridx,cidx,bs2=a->bs2; 2168 PetscBool roworiented=a->roworiented; 2169 MatScalar *ap,value,*aa=a->a,*bap; 2170 2171 PetscFunctionBegin; 2172 for (k=0; k<m; k++) { /* loop over added rows */ 2173 row = im[k]; 2174 brow = row/bs; 2175 if (row < 0) continue; 2176 #if defined(PETSC_USE_DEBUG) 2177 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2178 #endif 2179 rp = aj + ai[brow]; 2180 ap = aa + bs2*ai[brow]; 2181 rmax = imax[brow]; 2182 nrow = ailen[brow]; 2183 low = 0; 2184 high = nrow; 2185 for (l=0; l<n; l++) { /* loop over added columns */ 2186 if (in[l] < 0) continue; 2187 #if defined(PETSC_USE_DEBUG) 2188 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2189 #endif 2190 col = in[l]; bcol = col/bs; 2191 ridx = row % bs; cidx = col % bs; 2192 if (roworiented) { 2193 value = v[l + k*n]; 2194 } else { 2195 value = v[k + l*m]; 2196 } 2197 if (col <= lastcol) low = 0; else high = nrow; 2198 lastcol = col; 2199 while (high-low > 7) { 2200 t = (low+high)/2; 2201 if (rp[t] > bcol) high = t; 2202 else low = t; 2203 } 2204 for (i=low; i<high; i++) { 2205 if (rp[i] > bcol) break; 2206 if (rp[i] == bcol) { 2207 bap = ap + bs2*i + bs*cidx + ridx; 2208 if (is == ADD_VALUES) *bap += value; 2209 else *bap = value; 2210 goto noinsert1; 2211 } 2212 } 2213 if (nonew == 1) goto noinsert1; 2214 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2215 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2216 N = nrow++ - 1; high++; 2217 /* shift up all the later entries in this row */ 2218 for (ii=N; ii>=i; ii--) { 2219 rp[ii+1] = rp[ii]; 2220 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2221 } 2222 if (N>=i) { 2223 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2224 } 2225 rp[i] = bcol; 2226 ap[bs2*i + bs*cidx + ridx] = value; 2227 a->nz++; 2228 A->nonzerostate++; 2229 noinsert1:; 2230 low = i; 2231 } 2232 ailen[brow] = nrow; 2233 } 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2239 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2240 { 2241 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2242 Mat outA; 2243 PetscErrorCode ierr; 2244 PetscBool row_identity,col_identity; 2245 2246 PetscFunctionBegin; 2247 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2248 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2249 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2250 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2251 2252 outA = inA; 2253 inA->factortype = MAT_FACTOR_LU; 2254 2255 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2256 2257 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2258 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2259 a->row = row; 2260 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2261 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2262 a->col = col; 2263 2264 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2265 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2266 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2267 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2268 2269 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2270 if (!a->solve_work) { 2271 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2272 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2273 } 2274 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2280 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2281 { 2282 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2283 PetscInt i,nz,mbs; 2284 2285 PetscFunctionBegin; 2286 nz = baij->maxnz; 2287 mbs = baij->mbs; 2288 for (i=0; i<nz; i++) { 2289 baij->j[i] = indices[i]; 2290 } 2291 baij->nz = nz; 2292 for (i=0; i<mbs; i++) { 2293 baij->ilen[i] = baij->imax[i]; 2294 } 2295 PetscFunctionReturn(0); 2296 } 2297 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2300 /*@ 2301 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2302 in the matrix. 2303 2304 Input Parameters: 2305 + mat - the SeqBAIJ matrix 2306 - indices - the column indices 2307 2308 Level: advanced 2309 2310 Notes: 2311 This can be called if you have precomputed the nonzero structure of the 2312 matrix and want to provide it to the matrix object to improve the performance 2313 of the MatSetValues() operation. 2314 2315 You MUST have set the correct numbers of nonzeros per row in the call to 2316 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2317 2318 MUST be called before any calls to MatSetValues(); 2319 2320 @*/ 2321 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2322 { 2323 PetscErrorCode ierr; 2324 2325 PetscFunctionBegin; 2326 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2327 PetscValidPointer(indices,2); 2328 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2329 PetscFunctionReturn(0); 2330 } 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2334 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2335 { 2336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2337 PetscErrorCode ierr; 2338 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2339 PetscReal atmp; 2340 PetscScalar *x,zero = 0.0; 2341 MatScalar *aa; 2342 PetscInt ncols,brow,krow,kcol; 2343 2344 PetscFunctionBegin; 2345 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2346 bs = A->rmap->bs; 2347 aa = a->a; 2348 ai = a->i; 2349 aj = a->j; 2350 mbs = a->mbs; 2351 2352 ierr = VecSet(v,zero);CHKERRQ(ierr); 2353 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2354 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2355 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2356 for (i=0; i<mbs; i++) { 2357 ncols = ai[1] - ai[0]; ai++; 2358 brow = bs*i; 2359 for (j=0; j<ncols; j++) { 2360 for (kcol=0; kcol<bs; kcol++) { 2361 for (krow=0; krow<bs; krow++) { 2362 atmp = PetscAbsScalar(*aa);aa++; 2363 row = brow + krow; /* row index */ 2364 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2365 } 2366 } 2367 aj++; 2368 } 2369 } 2370 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatCopy_SeqBAIJ" 2376 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2377 { 2378 PetscErrorCode ierr; 2379 2380 PetscFunctionBegin; 2381 /* If the two matrices have the same copy implementation, use fast copy. */ 2382 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2384 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2385 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2386 2387 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2388 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2389 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2390 } else { 2391 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2392 } 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2398 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2399 { 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2409 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2410 { 2411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2412 2413 PetscFunctionBegin; 2414 *array = a->a; 2415 PetscFunctionReturn(0); 2416 } 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2420 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2421 { 2422 PetscFunctionBegin; 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2428 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2429 { 2430 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2431 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2432 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 /* Set the number of nonzeros in the new matrix */ 2437 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 #undef __FUNCT__ 2442 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2443 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2444 { 2445 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2446 PetscErrorCode ierr; 2447 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2448 PetscBLASInt one=1; 2449 2450 PetscFunctionBegin; 2451 if (str == SAME_NONZERO_PATTERN) { 2452 PetscScalar alpha = a; 2453 PetscBLASInt bnz; 2454 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2455 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2456 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2457 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2458 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2459 } else { 2460 Mat B; 2461 PetscInt *nnz; 2462 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2463 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2464 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2465 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2466 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2467 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2468 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2469 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2470 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2471 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2472 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2473 ierr = PetscFree(nnz);CHKERRQ(ierr); 2474 } 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2480 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2484 MatScalar *aa = a->a; 2485 2486 PetscFunctionBegin; 2487 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2493 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2494 { 2495 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2496 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2497 MatScalar *aa = a->a; 2498 2499 PetscFunctionBegin; 2500 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2506 /* 2507 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2508 */ 2509 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2510 { 2511 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2512 PetscErrorCode ierr; 2513 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2514 PetscInt nz = a->i[m],row,*jj,mr,col; 2515 2516 PetscFunctionBegin; 2517 *nn = n; 2518 if (!ia) PetscFunctionReturn(0); 2519 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2520 else { 2521 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2522 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2523 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2524 jj = a->j; 2525 for (i=0; i<nz; i++) { 2526 collengths[jj[i]]++; 2527 } 2528 cia[0] = oshift; 2529 for (i=0; i<n; i++) { 2530 cia[i+1] = cia[i] + collengths[i]; 2531 } 2532 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2533 jj = a->j; 2534 for (row=0; row<m; row++) { 2535 mr = a->i[row+1] - a->i[row]; 2536 for (i=0; i<mr; i++) { 2537 col = *jj++; 2538 2539 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2540 } 2541 } 2542 ierr = PetscFree(collengths);CHKERRQ(ierr); 2543 *ia = cia; *ja = cja; 2544 } 2545 PetscFunctionReturn(0); 2546 } 2547 2548 #undef __FUNCT__ 2549 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2550 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2551 { 2552 PetscErrorCode ierr; 2553 2554 PetscFunctionBegin; 2555 if (!ia) PetscFunctionReturn(0); 2556 ierr = PetscFree(*ia);CHKERRQ(ierr); 2557 ierr = PetscFree(*ja);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 /* 2562 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2563 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2564 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2565 */ 2566 #undef __FUNCT__ 2567 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2568 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2569 { 2570 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2571 PetscErrorCode ierr; 2572 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2573 PetscInt nz = a->i[m],row,*jj,mr,col; 2574 PetscInt *cspidx; 2575 2576 PetscFunctionBegin; 2577 *nn = n; 2578 if (!ia) PetscFunctionReturn(0); 2579 2580 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2581 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2582 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2583 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2584 jj = a->j; 2585 for (i=0; i<nz; i++) { 2586 collengths[jj[i]]++; 2587 } 2588 cia[0] = oshift; 2589 for (i=0; i<n; i++) { 2590 cia[i+1] = cia[i] + collengths[i]; 2591 } 2592 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2593 jj = a->j; 2594 for (row=0; row<m; row++) { 2595 mr = a->i[row+1] - a->i[row]; 2596 for (i=0; i<mr; i++) { 2597 col = *jj++; 2598 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2599 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2600 } 2601 } 2602 ierr = PetscFree(collengths);CHKERRQ(ierr); 2603 *ia = cia; *ja = cja; 2604 *spidx = cspidx; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2610 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2611 { 2612 PetscErrorCode ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2616 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatShift_SeqBAIJ" 2622 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2623 { 2624 PetscErrorCode ierr; 2625 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2626 2627 PetscFunctionBegin; 2628 if (!Y->preallocated || !aij->nz) { 2629 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2630 } 2631 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 /* -------------------------------------------------------------------*/ 2636 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2637 MatGetRow_SeqBAIJ, 2638 MatRestoreRow_SeqBAIJ, 2639 MatMult_SeqBAIJ_N, 2640 /* 4*/ MatMultAdd_SeqBAIJ_N, 2641 MatMultTranspose_SeqBAIJ, 2642 MatMultTransposeAdd_SeqBAIJ, 2643 0, 2644 0, 2645 0, 2646 /* 10*/ 0, 2647 MatLUFactor_SeqBAIJ, 2648 0, 2649 0, 2650 MatTranspose_SeqBAIJ, 2651 /* 15*/ MatGetInfo_SeqBAIJ, 2652 MatEqual_SeqBAIJ, 2653 MatGetDiagonal_SeqBAIJ, 2654 MatDiagonalScale_SeqBAIJ, 2655 MatNorm_SeqBAIJ, 2656 /* 20*/ 0, 2657 MatAssemblyEnd_SeqBAIJ, 2658 MatSetOption_SeqBAIJ, 2659 MatZeroEntries_SeqBAIJ, 2660 /* 24*/ MatZeroRows_SeqBAIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /* 29*/ MatSetUp_SeqBAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /* 34*/ MatDuplicate_SeqBAIJ, 2671 0, 2672 0, 2673 MatILUFactor_SeqBAIJ, 2674 0, 2675 /* 39*/ MatAXPY_SeqBAIJ, 2676 MatGetSubMatrices_SeqBAIJ, 2677 MatIncreaseOverlap_SeqBAIJ, 2678 MatGetValues_SeqBAIJ, 2679 MatCopy_SeqBAIJ, 2680 /* 44*/ 0, 2681 MatScale_SeqBAIJ, 2682 MatShift_SeqBAIJ, 2683 0, 2684 MatZeroRowsColumns_SeqBAIJ, 2685 /* 49*/ 0, 2686 MatGetRowIJ_SeqBAIJ, 2687 MatRestoreRowIJ_SeqBAIJ, 2688 MatGetColumnIJ_SeqBAIJ, 2689 MatRestoreColumnIJ_SeqBAIJ, 2690 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2691 0, 2692 0, 2693 0, 2694 MatSetValuesBlocked_SeqBAIJ, 2695 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2696 MatDestroy_SeqBAIJ, 2697 MatView_SeqBAIJ, 2698 0, 2699 0, 2700 /* 64*/ 0, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2706 0, 2707 MatConvert_Basic, 2708 0, 2709 0, 2710 /* 74*/ 0, 2711 MatFDColoringApply_BAIJ, 2712 0, 2713 0, 2714 0, 2715 /* 79*/ 0, 2716 0, 2717 0, 2718 0, 2719 MatLoad_SeqBAIJ, 2720 /* 84*/ 0, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /* 89*/ 0, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /* 94*/ 0, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /* 99*/ 0, 2736 0, 2737 0, 2738 0, 2739 0, 2740 /*104*/ 0, 2741 MatRealPart_SeqBAIJ, 2742 MatImaginaryPart_SeqBAIJ, 2743 0, 2744 0, 2745 /*109*/ 0, 2746 0, 2747 0, 2748 0, 2749 MatMissingDiagonal_SeqBAIJ, 2750 /*114*/ 0, 2751 0, 2752 0, 2753 0, 2754 0, 2755 /*119*/ 0, 2756 0, 2757 MatMultHermitianTranspose_SeqBAIJ, 2758 MatMultHermitianTransposeAdd_SeqBAIJ, 2759 0, 2760 /*124*/ 0, 2761 0, 2762 MatInvertBlockDiagonal_SeqBAIJ, 2763 0, 2764 0, 2765 /*129*/ 0, 2766 0, 2767 0, 2768 0, 2769 0, 2770 /*134*/ 0, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*139*/ 0, 2776 0, 2777 0, 2778 MatFDColoringSetUp_SeqXAIJ, 2779 0, 2780 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2781 }; 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2785 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2786 { 2787 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2788 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2793 2794 /* allocate space for values if not already there */ 2795 if (!aij->saved_values) { 2796 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2797 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2798 } 2799 2800 /* copy values over */ 2801 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2802 PetscFunctionReturn(0); 2803 } 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2807 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2808 { 2809 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2810 PetscErrorCode ierr; 2811 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2812 2813 PetscFunctionBegin; 2814 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2815 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2816 2817 /* copy values over */ 2818 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2819 PetscFunctionReturn(0); 2820 } 2821 2822 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2823 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2824 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2827 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2828 { 2829 Mat_SeqBAIJ *b; 2830 PetscErrorCode ierr; 2831 PetscInt i,mbs,nbs,bs2; 2832 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2833 2834 PetscFunctionBegin; 2835 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2836 if (nz == MAT_SKIP_ALLOCATION) { 2837 skipallocation = PETSC_TRUE; 2838 nz = 0; 2839 } 2840 2841 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2842 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2843 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2844 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2845 2846 B->preallocated = PETSC_TRUE; 2847 2848 mbs = B->rmap->n/bs; 2849 nbs = B->cmap->n/bs; 2850 bs2 = bs*bs; 2851 2852 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2853 2854 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2855 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2856 if (nnz) { 2857 for (i=0; i<mbs; i++) { 2858 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2859 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2860 } 2861 } 2862 2863 b = (Mat_SeqBAIJ*)B->data; 2864 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2865 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2866 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2867 2868 if (!flg) { 2869 switch (bs) { 2870 case 1: 2871 B->ops->mult = MatMult_SeqBAIJ_1; 2872 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2873 break; 2874 case 2: 2875 B->ops->mult = MatMult_SeqBAIJ_2; 2876 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2877 break; 2878 case 3: 2879 B->ops->mult = MatMult_SeqBAIJ_3; 2880 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2881 break; 2882 case 4: 2883 B->ops->mult = MatMult_SeqBAIJ_4; 2884 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2885 break; 2886 case 5: 2887 B->ops->mult = MatMult_SeqBAIJ_5; 2888 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2889 break; 2890 case 6: 2891 B->ops->mult = MatMult_SeqBAIJ_6; 2892 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2893 break; 2894 case 7: 2895 B->ops->mult = MatMult_SeqBAIJ_7; 2896 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2897 break; 2898 case 15: 2899 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2900 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2901 break; 2902 default: 2903 B->ops->mult = MatMult_SeqBAIJ_N; 2904 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2905 break; 2906 } 2907 } 2908 B->ops->sor = MatSOR_SeqBAIJ; 2909 b->mbs = mbs; 2910 b->nbs = nbs; 2911 if (!skipallocation) { 2912 if (!b->imax) { 2913 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2914 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2915 2916 b->free_imax_ilen = PETSC_TRUE; 2917 } 2918 /* b->ilen will count nonzeros in each block row so far. */ 2919 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2920 if (!nnz) { 2921 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2922 else if (nz < 0) nz = 1; 2923 for (i=0; i<mbs; i++) b->imax[i] = nz; 2924 nz = nz*mbs; 2925 } else { 2926 nz = 0; 2927 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2928 } 2929 2930 /* allocate the matrix space */ 2931 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2932 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2933 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2934 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2935 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2936 2937 b->singlemalloc = PETSC_TRUE; 2938 b->i[0] = 0; 2939 for (i=1; i<mbs+1; i++) { 2940 b->i[i] = b->i[i-1] + b->imax[i-1]; 2941 } 2942 b->free_a = PETSC_TRUE; 2943 b->free_ij = PETSC_TRUE; 2944 } else { 2945 b->free_a = PETSC_FALSE; 2946 b->free_ij = PETSC_FALSE; 2947 } 2948 2949 b->bs2 = bs2; 2950 b->mbs = mbs; 2951 b->nz = 0; 2952 b->maxnz = nz; 2953 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2954 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2960 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2961 { 2962 PetscInt i,m,nz,nz_max=0,*nnz; 2963 PetscScalar *values=0; 2964 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2965 PetscErrorCode ierr; 2966 2967 PetscFunctionBegin; 2968 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2969 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2970 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2971 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2972 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2973 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2974 m = B->rmap->n/bs; 2975 2976 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2977 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2978 for (i=0; i<m; i++) { 2979 nz = ii[i+1]- ii[i]; 2980 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2981 nz_max = PetscMax(nz_max, nz); 2982 nnz[i] = nz; 2983 } 2984 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2985 ierr = PetscFree(nnz);CHKERRQ(ierr); 2986 2987 values = (PetscScalar*)V; 2988 if (!values) { 2989 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2990 } 2991 for (i=0; i<m; i++) { 2992 PetscInt ncols = ii[i+1] - ii[i]; 2993 const PetscInt *icols = jj + ii[i]; 2994 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2995 if (!roworiented) { 2996 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2997 } else { 2998 PetscInt j; 2999 for (j=0; j<ncols; j++) { 3000 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3001 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3002 } 3003 } 3004 } 3005 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3006 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3007 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3008 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 /*MC 3013 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3014 block sparse compressed row format. 3015 3016 Options Database Keys: 3017 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3018 3019 Level: beginner 3020 3021 .seealso: MatCreateSeqBAIJ() 3022 M*/ 3023 3024 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3025 3026 #undef __FUNCT__ 3027 #define __FUNCT__ "MatCreate_SeqBAIJ" 3028 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3029 { 3030 PetscErrorCode ierr; 3031 PetscMPIInt size; 3032 Mat_SeqBAIJ *b; 3033 3034 PetscFunctionBegin; 3035 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3036 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3037 3038 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3039 B->data = (void*)b; 3040 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3041 3042 b->row = 0; 3043 b->col = 0; 3044 b->icol = 0; 3045 b->reallocs = 0; 3046 b->saved_values = 0; 3047 3048 b->roworiented = PETSC_TRUE; 3049 b->nonew = 0; 3050 b->diag = 0; 3051 B->spptr = 0; 3052 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3053 b->keepnonzeropattern = PETSC_FALSE; 3054 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3071 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3072 { 3073 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3074 PetscErrorCode ierr; 3075 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3076 3077 PetscFunctionBegin; 3078 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3079 3080 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3081 c->imax = a->imax; 3082 c->ilen = a->ilen; 3083 c->free_imax_ilen = PETSC_FALSE; 3084 } else { 3085 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3086 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3087 for (i=0; i<mbs; i++) { 3088 c->imax[i] = a->imax[i]; 3089 c->ilen[i] = a->ilen[i]; 3090 } 3091 c->free_imax_ilen = PETSC_TRUE; 3092 } 3093 3094 /* allocate the matrix space */ 3095 if (mallocmatspace) { 3096 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3097 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3098 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3099 3100 c->i = a->i; 3101 c->j = a->j; 3102 c->singlemalloc = PETSC_FALSE; 3103 c->free_a = PETSC_TRUE; 3104 c->free_ij = PETSC_FALSE; 3105 c->parent = A; 3106 C->preallocated = PETSC_TRUE; 3107 C->assembled = PETSC_TRUE; 3108 3109 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3110 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3111 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3112 } else { 3113 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3114 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3115 3116 c->singlemalloc = PETSC_TRUE; 3117 c->free_a = PETSC_TRUE; 3118 c->free_ij = PETSC_TRUE; 3119 3120 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3121 if (mbs > 0) { 3122 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3123 if (cpvalues == MAT_COPY_VALUES) { 3124 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3125 } else { 3126 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3127 } 3128 } 3129 C->preallocated = PETSC_TRUE; 3130 C->assembled = PETSC_TRUE; 3131 } 3132 } 3133 3134 c->roworiented = a->roworiented; 3135 c->nonew = a->nonew; 3136 3137 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3138 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3139 3140 c->bs2 = a->bs2; 3141 c->mbs = a->mbs; 3142 c->nbs = a->nbs; 3143 3144 if (a->diag) { 3145 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3146 c->diag = a->diag; 3147 c->free_diag = PETSC_FALSE; 3148 } else { 3149 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3150 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3151 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3152 c->free_diag = PETSC_TRUE; 3153 } 3154 } else c->diag = 0; 3155 3156 c->nz = a->nz; 3157 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3158 c->solve_work = NULL; 3159 c->mult_work = NULL; 3160 c->sor_workt = NULL; 3161 c->sor_work = NULL; 3162 3163 c->compressedrow.use = a->compressedrow.use; 3164 c->compressedrow.nrows = a->compressedrow.nrows; 3165 if (a->compressedrow.use) { 3166 i = a->compressedrow.nrows; 3167 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3168 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3169 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3170 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3171 } else { 3172 c->compressedrow.use = PETSC_FALSE; 3173 c->compressedrow.i = NULL; 3174 c->compressedrow.rindex = NULL; 3175 } 3176 C->nonzerostate = A->nonzerostate; 3177 3178 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3179 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3180 PetscFunctionReturn(0); 3181 } 3182 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3185 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3186 { 3187 PetscErrorCode ierr; 3188 3189 PetscFunctionBegin; 3190 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3191 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3192 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3193 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3194 PetscFunctionReturn(0); 3195 } 3196 3197 #undef __FUNCT__ 3198 #define __FUNCT__ "MatLoad_SeqBAIJ" 3199 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3200 { 3201 Mat_SeqBAIJ *a; 3202 PetscErrorCode ierr; 3203 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3204 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3205 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3206 PetscInt *masked,nmask,tmp,bs2,ishift; 3207 PetscMPIInt size; 3208 int fd; 3209 PetscScalar *aa; 3210 MPI_Comm comm; 3211 3212 PetscFunctionBegin; 3213 /* force binary viewer to load .info file if it has not yet done so */ 3214 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3215 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3216 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3217 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3218 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3219 if (bs < 0) bs = 1; 3220 bs2 = bs*bs; 3221 3222 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3223 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3224 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3225 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3226 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3227 M = header[1]; N = header[2]; nz = header[3]; 3228 3229 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3230 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3231 3232 /* 3233 This code adds extra rows to make sure the number of rows is 3234 divisible by the blocksize 3235 */ 3236 mbs = M/bs; 3237 extra_rows = bs - M + bs*(mbs); 3238 if (extra_rows == bs) extra_rows = 0; 3239 else mbs++; 3240 if (extra_rows) { 3241 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3242 } 3243 3244 /* Set global sizes if not already set */ 3245 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3246 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3247 } else { /* Check if the matrix global sizes are correct */ 3248 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3249 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3250 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3251 } 3252 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3253 } 3254 3255 /* read in row lengths */ 3256 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3257 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3258 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3259 3260 /* read in column indices */ 3261 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3262 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3263 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3264 3265 /* loop over row lengths determining block row lengths */ 3266 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3267 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3268 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3269 rowcount = 0; 3270 nzcount = 0; 3271 for (i=0; i<mbs; i++) { 3272 nmask = 0; 3273 for (j=0; j<bs; j++) { 3274 kmax = rowlengths[rowcount]; 3275 for (k=0; k<kmax; k++) { 3276 tmp = jj[nzcount++]/bs; 3277 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3278 } 3279 rowcount++; 3280 } 3281 browlengths[i] += nmask; 3282 /* zero out the mask elements we set */ 3283 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3284 } 3285 3286 /* Do preallocation */ 3287 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3288 a = (Mat_SeqBAIJ*)newmat->data; 3289 3290 /* set matrix "i" values */ 3291 a->i[0] = 0; 3292 for (i=1; i<= mbs; i++) { 3293 a->i[i] = a->i[i-1] + browlengths[i-1]; 3294 a->ilen[i-1] = browlengths[i-1]; 3295 } 3296 a->nz = 0; 3297 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3298 3299 /* read in nonzero values */ 3300 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3301 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3302 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3303 3304 /* set "a" and "j" values into matrix */ 3305 nzcount = 0; jcount = 0; 3306 for (i=0; i<mbs; i++) { 3307 nzcountb = nzcount; 3308 nmask = 0; 3309 for (j=0; j<bs; j++) { 3310 kmax = rowlengths[i*bs+j]; 3311 for (k=0; k<kmax; k++) { 3312 tmp = jj[nzcount++]/bs; 3313 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3314 } 3315 } 3316 /* sort the masked values */ 3317 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3318 3319 /* set "j" values into matrix */ 3320 maskcount = 1; 3321 for (j=0; j<nmask; j++) { 3322 a->j[jcount++] = masked[j]; 3323 mask[masked[j]] = maskcount++; 3324 } 3325 /* set "a" values into matrix */ 3326 ishift = bs2*a->i[i]; 3327 for (j=0; j<bs; j++) { 3328 kmax = rowlengths[i*bs+j]; 3329 for (k=0; k<kmax; k++) { 3330 tmp = jj[nzcountb]/bs; 3331 block = mask[tmp] - 1; 3332 point = jj[nzcountb] - bs*tmp; 3333 idx = ishift + bs2*block + j + bs*point; 3334 a->a[idx] = (MatScalar)aa[nzcountb++]; 3335 } 3336 } 3337 /* zero out the mask elements we set */ 3338 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3339 } 3340 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3341 3342 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3343 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3344 ierr = PetscFree(aa);CHKERRQ(ierr); 3345 ierr = PetscFree(jj);CHKERRQ(ierr); 3346 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3347 3348 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3349 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatCreateSeqBAIJ" 3355 /*@C 3356 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3357 compressed row) format. For good matrix assembly performance the 3358 user should preallocate the matrix storage by setting the parameter nz 3359 (or the array nnz). By setting these parameters accurately, performance 3360 during matrix assembly can be increased by more than a factor of 50. 3361 3362 Collective on MPI_Comm 3363 3364 Input Parameters: 3365 + comm - MPI communicator, set to PETSC_COMM_SELF 3366 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3367 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3368 . m - number of rows 3369 . n - number of columns 3370 . nz - number of nonzero blocks per block row (same for all rows) 3371 - nnz - array containing the number of nonzero blocks in the various block rows 3372 (possibly different for each block row) or NULL 3373 3374 Output Parameter: 3375 . A - the matrix 3376 3377 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3378 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3379 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3380 3381 Options Database Keys: 3382 . -mat_no_unroll - uses code that does not unroll the loops in the 3383 block calculations (much slower) 3384 . -mat_block_size - size of the blocks to use 3385 3386 Level: intermediate 3387 3388 Notes: 3389 The number of rows and columns must be divisible by blocksize. 3390 3391 If the nnz parameter is given then the nz parameter is ignored 3392 3393 A nonzero block is any block that as 1 or more nonzeros in it 3394 3395 The block AIJ format is fully compatible with standard Fortran 77 3396 storage. That is, the stored row and column indices can begin at 3397 either one (as in Fortran) or zero. See the users' manual for details. 3398 3399 Specify the preallocated storage with either nz or nnz (not both). 3400 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3401 allocation. See Users-Manual: ch_mat for details. 3402 matrices. 3403 3404 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3405 @*/ 3406 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3407 { 3408 PetscErrorCode ierr; 3409 3410 PetscFunctionBegin; 3411 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3412 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3413 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3414 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3415 PetscFunctionReturn(0); 3416 } 3417 3418 #undef __FUNCT__ 3419 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3420 /*@C 3421 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3422 per row in the matrix. For good matrix assembly performance the 3423 user should preallocate the matrix storage by setting the parameter nz 3424 (or the array nnz). By setting these parameters accurately, performance 3425 during matrix assembly can be increased by more than a factor of 50. 3426 3427 Collective on MPI_Comm 3428 3429 Input Parameters: 3430 + B - the matrix 3431 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3432 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3433 . nz - number of block nonzeros per block row (same for all rows) 3434 - nnz - array containing the number of block nonzeros in the various block rows 3435 (possibly different for each block row) or NULL 3436 3437 Options Database Keys: 3438 . -mat_no_unroll - uses code that does not unroll the loops in the 3439 block calculations (much slower) 3440 . -mat_block_size - size of the blocks to use 3441 3442 Level: intermediate 3443 3444 Notes: 3445 If the nnz parameter is given then the nz parameter is ignored 3446 3447 You can call MatGetInfo() to get information on how effective the preallocation was; 3448 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3449 You can also run with the option -info and look for messages with the string 3450 malloc in them to see if additional memory allocation was needed. 3451 3452 The block AIJ format is fully compatible with standard Fortran 77 3453 storage. That is, the stored row and column indices can begin at 3454 either one (as in Fortran) or zero. See the users' manual for details. 3455 3456 Specify the preallocated storage with either nz or nnz (not both). 3457 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3458 allocation. See Users-Manual: ch_mat for details. 3459 3460 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3461 @*/ 3462 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3463 { 3464 PetscErrorCode ierr; 3465 3466 PetscFunctionBegin; 3467 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3468 PetscValidType(B,1); 3469 PetscValidLogicalCollectiveInt(B,bs,2); 3470 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3471 PetscFunctionReturn(0); 3472 } 3473 3474 #undef __FUNCT__ 3475 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3476 /*@C 3477 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3478 (the default sequential PETSc format). 3479 3480 Collective on MPI_Comm 3481 3482 Input Parameters: 3483 + B - the matrix 3484 . i - the indices into j for the start of each local row (starts with zero) 3485 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3486 - v - optional values in the matrix 3487 3488 Level: developer 3489 3490 Notes: 3491 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3492 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3493 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3494 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3495 block column and the second index is over columns within a block. 3496 3497 .keywords: matrix, aij, compressed row, sparse 3498 3499 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3500 @*/ 3501 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3502 { 3503 PetscErrorCode ierr; 3504 3505 PetscFunctionBegin; 3506 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3507 PetscValidType(B,1); 3508 PetscValidLogicalCollectiveInt(B,bs,2); 3509 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3510 PetscFunctionReturn(0); 3511 } 3512 3513 3514 #undef __FUNCT__ 3515 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3516 /*@ 3517 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3518 3519 Collective on MPI_Comm 3520 3521 Input Parameters: 3522 + comm - must be an MPI communicator of size 1 3523 . bs - size of block 3524 . m - number of rows 3525 . n - number of columns 3526 . i - row indices 3527 . j - column indices 3528 - a - matrix values 3529 3530 Output Parameter: 3531 . mat - the matrix 3532 3533 Level: advanced 3534 3535 Notes: 3536 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3537 once the matrix is destroyed 3538 3539 You cannot set new nonzero locations into this matrix, that will generate an error. 3540 3541 The i and j indices are 0 based 3542 3543 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3544 3545 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3546 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3547 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3548 with column-major ordering within blocks. 3549 3550 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3551 3552 @*/ 3553 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3554 { 3555 PetscErrorCode ierr; 3556 PetscInt ii; 3557 Mat_SeqBAIJ *baij; 3558 3559 PetscFunctionBegin; 3560 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3561 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3562 3563 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3564 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3565 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3566 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3567 baij = (Mat_SeqBAIJ*)(*mat)->data; 3568 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3569 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3570 3571 baij->i = i; 3572 baij->j = j; 3573 baij->a = a; 3574 3575 baij->singlemalloc = PETSC_FALSE; 3576 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3577 baij->free_a = PETSC_FALSE; 3578 baij->free_ij = PETSC_FALSE; 3579 3580 for (ii=0; ii<m; ii++) { 3581 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3582 #if defined(PETSC_USE_DEBUG) 3583 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3584 #endif 3585 } 3586 #if defined(PETSC_USE_DEBUG) 3587 for (ii=0; ii<baij->i[m]; ii++) { 3588 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3589 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3590 } 3591 #endif 3592 3593 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3594 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3595 PetscFunctionReturn(0); 3596 } 3597 3598 #undef __FUNCT__ 3599 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3600 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3601 { 3602 PetscErrorCode ierr; 3603 3604 PetscFunctionBegin; 3605 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3606 PetscFunctionReturn(0); 3607 } 3608