1 /* 2 Defines the basic matrix operations for the MAIJ matrix storage format. 3 This format is used for restriction and interpolation operations for 4 multicomponent problems. It interpolates each component the same way 5 independently. 6 7 We provide: 8 MatMult() 9 MatMultTranspose() 10 MatMultTransposeAdd() 11 MatMultAdd() 12 and 13 MatCreateMAIJ(Mat,dof,Mat*) 14 15 This single directory handles both the sequential and parallel codes 16 */ 17 18 #include "src/mat/impls/maij/maij.h" 19 #include "src/mat/utils/freespace.h" 20 #include "vecimpl.h" 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatMAIJGetAIJ" 24 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 25 { 26 PetscErrorCode ierr; 27 PetscTruth ismpimaij,isseqmaij; 28 29 PetscFunctionBegin; 30 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32 if (ismpimaij) { 33 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34 35 *B = b->A; 36 } else if (isseqmaij) { 37 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38 39 *B = b->AIJ; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatMAIJRedimension" 48 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 49 { 50 PetscErrorCode ierr; 51 Mat Aij; 52 53 PetscFunctionBegin; 54 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatDestroy_SeqMAIJ" 61 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 62 { 63 PetscErrorCode ierr; 64 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 65 66 PetscFunctionBegin; 67 if (b->AIJ) { 68 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 69 } 70 ierr = PetscFree(b);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatView_SeqMAIJ" 76 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 77 { 78 PetscErrorCode ierr; 79 Mat B; 80 81 PetscFunctionBegin; 82 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 83 ierr = MatView(B,viewer);CHKERRQ(ierr); 84 ierr = MatDestroy(B);CHKERRQ(ierr); 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatView_MPIMAIJ" 90 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 91 { 92 PetscErrorCode ierr; 93 Mat B; 94 95 PetscFunctionBegin; 96 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 97 ierr = MatView(B,viewer);CHKERRQ(ierr); 98 ierr = MatDestroy(B);CHKERRQ(ierr); 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatDestroy_MPIMAIJ" 104 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 105 { 106 PetscErrorCode ierr; 107 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 108 109 PetscFunctionBegin; 110 if (b->AIJ) { 111 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 112 } 113 if (b->OAIJ) { 114 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 115 } 116 if (b->A) { 117 ierr = MatDestroy(b->A);CHKERRQ(ierr); 118 } 119 if (b->ctx) { 120 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 121 } 122 if (b->w) { 123 ierr = VecDestroy(b->w);CHKERRQ(ierr); 124 } 125 ierr = PetscFree(b);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 /*MC 130 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 131 multicomponent problems, interpolating or restricting each component the same way independently. 132 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 133 134 Operations provided: 135 . MatMult 136 . MatMultTranspose 137 . MatMultAdd 138 . MatMultTransposeAdd 139 140 Level: advanced 141 142 .seealso: MatCreateSeqDense 143 M*/ 144 145 EXTERN_C_BEGIN 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatCreate_MAIJ" 148 PetscErrorCode MatCreate_MAIJ(Mat A) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIMAIJ *b; 152 153 PetscFunctionBegin; 154 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 155 A->data = (void*)b; 156 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 157 A->factor = 0; 158 A->mapping = 0; 159 160 b->AIJ = 0; 161 b->dof = 0; 162 b->OAIJ = 0; 163 b->ctx = 0; 164 b->w = 0; 165 PetscFunctionReturn(0); 166 } 167 EXTERN_C_END 168 169 /* --------------------------------------------------------------------------------------*/ 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatMult_SeqMAIJ_2" 172 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 173 { 174 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 176 PetscScalar *x,*y,*v,sum1, sum2; 177 PetscErrorCode ierr; 178 PetscInt m = b->AIJ->m,*idx,*ii; 179 PetscInt n,i,jrow,j; 180 181 PetscFunctionBegin; 182 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 183 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 184 idx = a->j; 185 v = a->a; 186 ii = a->i; 187 188 for (i=0; i<m; i++) { 189 jrow = ii[i]; 190 n = ii[i+1] - jrow; 191 sum1 = 0.0; 192 sum2 = 0.0; 193 for (j=0; j<n; j++) { 194 sum1 += v[jrow]*x[2*idx[jrow]]; 195 sum2 += v[jrow]*x[2*idx[jrow]+1]; 196 jrow++; 197 } 198 y[2*i] = sum1; 199 y[2*i+1] = sum2; 200 } 201 202 PetscLogFlops(4*a->nz - 2*m); 203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 204 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 210 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 211 { 212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 214 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 215 PetscErrorCode ierr; 216 PetscInt m = b->AIJ->m,n,i,*idx; 217 218 PetscFunctionBegin; 219 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 220 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 221 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 222 223 for (i=0; i<m; i++) { 224 idx = a->j + a->i[i] ; 225 v = a->a + a->i[i] ; 226 n = a->i[i+1] - a->i[i]; 227 alpha1 = x[2*i]; 228 alpha2 = x[2*i+1]; 229 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 230 } 231 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 232 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 239 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 240 { 241 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 243 PetscScalar *x,*y,*v,sum1, sum2; 244 PetscErrorCode ierr; 245 PetscInt m = b->AIJ->m,*idx,*ii; 246 PetscInt n,i,jrow,j; 247 248 PetscFunctionBegin; 249 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 250 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 251 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 252 idx = a->j; 253 v = a->a; 254 ii = a->i; 255 256 for (i=0; i<m; i++) { 257 jrow = ii[i]; 258 n = ii[i+1] - jrow; 259 sum1 = 0.0; 260 sum2 = 0.0; 261 for (j=0; j<n; j++) { 262 sum1 += v[jrow]*x[2*idx[jrow]]; 263 sum2 += v[jrow]*x[2*idx[jrow]+1]; 264 jrow++; 265 } 266 y[2*i] += sum1; 267 y[2*i+1] += sum2; 268 } 269 270 PetscLogFlops(4*a->nz - 2*m); 271 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 272 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 277 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 278 { 279 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 281 PetscScalar *x,*y,*v,alpha1,alpha2; 282 PetscErrorCode ierr; 283 PetscInt m = b->AIJ->m,n,i,*idx; 284 285 PetscFunctionBegin; 286 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 287 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 288 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 289 290 for (i=0; i<m; i++) { 291 idx = a->j + a->i[i] ; 292 v = a->a + a->i[i] ; 293 n = a->i[i+1] - a->i[i]; 294 alpha1 = x[2*i]; 295 alpha2 = x[2*i+1]; 296 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 297 } 298 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 299 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 300 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 /* --------------------------------------------------------------------------------------*/ 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatMult_SeqMAIJ_3" 306 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 307 { 308 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 309 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 310 PetscScalar *x,*y,*v,sum1, sum2, sum3; 311 PetscErrorCode ierr; 312 PetscInt m = b->AIJ->m,*idx,*ii; 313 PetscInt n,i,jrow,j; 314 315 PetscFunctionBegin; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 318 idx = a->j; 319 v = a->a; 320 ii = a->i; 321 322 for (i=0; i<m; i++) { 323 jrow = ii[i]; 324 n = ii[i+1] - jrow; 325 sum1 = 0.0; 326 sum2 = 0.0; 327 sum3 = 0.0; 328 for (j=0; j<n; j++) { 329 sum1 += v[jrow]*x[3*idx[jrow]]; 330 sum2 += v[jrow]*x[3*idx[jrow]+1]; 331 sum3 += v[jrow]*x[3*idx[jrow]+2]; 332 jrow++; 333 } 334 y[3*i] = sum1; 335 y[3*i+1] = sum2; 336 y[3*i+2] = sum3; 337 } 338 339 PetscLogFlops(6*a->nz - 3*m); 340 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 341 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 347 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 348 { 349 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 350 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 351 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 352 PetscErrorCode ierr; 353 PetscInt m = b->AIJ->m,n,i,*idx; 354 355 PetscFunctionBegin; 356 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 358 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 359 360 for (i=0; i<m; i++) { 361 idx = a->j + a->i[i]; 362 v = a->a + a->i[i]; 363 n = a->i[i+1] - a->i[i]; 364 alpha1 = x[3*i]; 365 alpha2 = x[3*i+1]; 366 alpha3 = x[3*i+2]; 367 while (n-->0) { 368 y[3*(*idx)] += alpha1*(*v); 369 y[3*(*idx)+1] += alpha2*(*v); 370 y[3*(*idx)+2] += alpha3*(*v); 371 idx++; v++; 372 } 373 } 374 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 375 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 376 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 382 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 383 { 384 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 386 PetscScalar *x,*y,*v,sum1, sum2, sum3; 387 PetscErrorCode ierr; 388 PetscInt m = b->AIJ->m,*idx,*ii; 389 PetscInt n,i,jrow,j; 390 391 PetscFunctionBegin; 392 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 393 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 394 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 395 idx = a->j; 396 v = a->a; 397 ii = a->i; 398 399 for (i=0; i<m; i++) { 400 jrow = ii[i]; 401 n = ii[i+1] - jrow; 402 sum1 = 0.0; 403 sum2 = 0.0; 404 sum3 = 0.0; 405 for (j=0; j<n; j++) { 406 sum1 += v[jrow]*x[3*idx[jrow]]; 407 sum2 += v[jrow]*x[3*idx[jrow]+1]; 408 sum3 += v[jrow]*x[3*idx[jrow]+2]; 409 jrow++; 410 } 411 y[3*i] += sum1; 412 y[3*i+1] += sum2; 413 y[3*i+2] += sum3; 414 } 415 416 PetscLogFlops(6*a->nz); 417 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 418 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 423 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 424 { 425 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 426 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 427 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 428 PetscErrorCode ierr; 429 PetscInt m = b->AIJ->m,n,i,*idx; 430 431 PetscFunctionBegin; 432 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 433 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 434 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 435 for (i=0; i<m; i++) { 436 idx = a->j + a->i[i] ; 437 v = a->a + a->i[i] ; 438 n = a->i[i+1] - a->i[i]; 439 alpha1 = x[3*i]; 440 alpha2 = x[3*i+1]; 441 alpha3 = x[3*i+2]; 442 while (n-->0) { 443 y[3*(*idx)] += alpha1*(*v); 444 y[3*(*idx)+1] += alpha2*(*v); 445 y[3*(*idx)+2] += alpha3*(*v); 446 idx++; v++; 447 } 448 } 449 PetscLogFlops(6*a->nz); 450 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 451 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 /* ------------------------------------------------------------------------------*/ 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_SeqMAIJ_4" 458 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 459 { 460 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 461 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 462 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 463 PetscErrorCode ierr; 464 PetscInt m = b->AIJ->m,*idx,*ii; 465 PetscInt n,i,jrow,j; 466 467 PetscFunctionBegin; 468 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 469 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 470 idx = a->j; 471 v = a->a; 472 ii = a->i; 473 474 for (i=0; i<m; i++) { 475 jrow = ii[i]; 476 n = ii[i+1] - jrow; 477 sum1 = 0.0; 478 sum2 = 0.0; 479 sum3 = 0.0; 480 sum4 = 0.0; 481 for (j=0; j<n; j++) { 482 sum1 += v[jrow]*x[4*idx[jrow]]; 483 sum2 += v[jrow]*x[4*idx[jrow]+1]; 484 sum3 += v[jrow]*x[4*idx[jrow]+2]; 485 sum4 += v[jrow]*x[4*idx[jrow]+3]; 486 jrow++; 487 } 488 y[4*i] = sum1; 489 y[4*i+1] = sum2; 490 y[4*i+2] = sum3; 491 y[4*i+3] = sum4; 492 } 493 494 PetscLogFlops(8*a->nz - 4*m); 495 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 496 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 502 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 503 { 504 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 506 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 507 PetscErrorCode ierr; 508 PetscInt m = b->AIJ->m,n,i,*idx; 509 510 PetscFunctionBegin; 511 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 512 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 513 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 514 for (i=0; i<m; i++) { 515 idx = a->j + a->i[i] ; 516 v = a->a + a->i[i] ; 517 n = a->i[i+1] - a->i[i]; 518 alpha1 = x[4*i]; 519 alpha2 = x[4*i+1]; 520 alpha3 = x[4*i+2]; 521 alpha4 = x[4*i+3]; 522 while (n-->0) { 523 y[4*(*idx)] += alpha1*(*v); 524 y[4*(*idx)+1] += alpha2*(*v); 525 y[4*(*idx)+2] += alpha3*(*v); 526 y[4*(*idx)+3] += alpha4*(*v); 527 idx++; v++; 528 } 529 } 530 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 531 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 532 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 538 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 539 { 540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 543 PetscErrorCode ierr; 544 PetscInt m = b->AIJ->m,*idx,*ii; 545 PetscInt n,i,jrow,j; 546 547 PetscFunctionBegin; 548 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 549 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 550 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 551 idx = a->j; 552 v = a->a; 553 ii = a->i; 554 555 for (i=0; i<m; i++) { 556 jrow = ii[i]; 557 n = ii[i+1] - jrow; 558 sum1 = 0.0; 559 sum2 = 0.0; 560 sum3 = 0.0; 561 sum4 = 0.0; 562 for (j=0; j<n; j++) { 563 sum1 += v[jrow]*x[4*idx[jrow]]; 564 sum2 += v[jrow]*x[4*idx[jrow]+1]; 565 sum3 += v[jrow]*x[4*idx[jrow]+2]; 566 sum4 += v[jrow]*x[4*idx[jrow]+3]; 567 jrow++; 568 } 569 y[4*i] += sum1; 570 y[4*i+1] += sum2; 571 y[4*i+2] += sum3; 572 y[4*i+3] += sum4; 573 } 574 575 PetscLogFlops(8*a->nz - 4*m); 576 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 582 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 583 { 584 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 585 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 586 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 587 PetscErrorCode ierr; 588 PetscInt m = b->AIJ->m,n,i,*idx; 589 590 PetscFunctionBegin; 591 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 592 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 593 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 594 595 for (i=0; i<m; i++) { 596 idx = a->j + a->i[i] ; 597 v = a->a + a->i[i] ; 598 n = a->i[i+1] - a->i[i]; 599 alpha1 = x[4*i]; 600 alpha2 = x[4*i+1]; 601 alpha3 = x[4*i+2]; 602 alpha4 = x[4*i+3]; 603 while (n-->0) { 604 y[4*(*idx)] += alpha1*(*v); 605 y[4*(*idx)+1] += alpha2*(*v); 606 y[4*(*idx)+2] += alpha3*(*v); 607 y[4*(*idx)+3] += alpha4*(*v); 608 idx++; v++; 609 } 610 } 611 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 612 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 613 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 /* ------------------------------------------------------------------------------*/ 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "MatMult_SeqMAIJ_5" 620 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 621 { 622 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 623 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 624 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 625 PetscErrorCode ierr; 626 PetscInt m = b->AIJ->m,*idx,*ii; 627 PetscInt n,i,jrow,j; 628 629 PetscFunctionBegin; 630 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 631 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 632 idx = a->j; 633 v = a->a; 634 ii = a->i; 635 636 for (i=0; i<m; i++) { 637 jrow = ii[i]; 638 n = ii[i+1] - jrow; 639 sum1 = 0.0; 640 sum2 = 0.0; 641 sum3 = 0.0; 642 sum4 = 0.0; 643 sum5 = 0.0; 644 for (j=0; j<n; j++) { 645 sum1 += v[jrow]*x[5*idx[jrow]]; 646 sum2 += v[jrow]*x[5*idx[jrow]+1]; 647 sum3 += v[jrow]*x[5*idx[jrow]+2]; 648 sum4 += v[jrow]*x[5*idx[jrow]+3]; 649 sum5 += v[jrow]*x[5*idx[jrow]+4]; 650 jrow++; 651 } 652 y[5*i] = sum1; 653 y[5*i+1] = sum2; 654 y[5*i+2] = sum3; 655 y[5*i+3] = sum4; 656 y[5*i+4] = sum5; 657 } 658 659 PetscLogFlops(10*a->nz - 5*m); 660 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 661 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 667 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 668 { 669 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 670 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 671 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 672 PetscErrorCode ierr; 673 PetscInt m = b->AIJ->m,n,i,*idx; 674 675 PetscFunctionBegin; 676 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 677 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 678 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 679 680 for (i=0; i<m; i++) { 681 idx = a->j + a->i[i] ; 682 v = a->a + a->i[i] ; 683 n = a->i[i+1] - a->i[i]; 684 alpha1 = x[5*i]; 685 alpha2 = x[5*i+1]; 686 alpha3 = x[5*i+2]; 687 alpha4 = x[5*i+3]; 688 alpha5 = x[5*i+4]; 689 while (n-->0) { 690 y[5*(*idx)] += alpha1*(*v); 691 y[5*(*idx)+1] += alpha2*(*v); 692 y[5*(*idx)+2] += alpha3*(*v); 693 y[5*(*idx)+3] += alpha4*(*v); 694 y[5*(*idx)+4] += alpha5*(*v); 695 idx++; v++; 696 } 697 } 698 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 706 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 707 { 708 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 710 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 711 PetscErrorCode ierr; 712 PetscInt m = b->AIJ->m,*idx,*ii; 713 PetscInt n,i,jrow,j; 714 715 PetscFunctionBegin; 716 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 717 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 718 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 719 idx = a->j; 720 v = a->a; 721 ii = a->i; 722 723 for (i=0; i<m; i++) { 724 jrow = ii[i]; 725 n = ii[i+1] - jrow; 726 sum1 = 0.0; 727 sum2 = 0.0; 728 sum3 = 0.0; 729 sum4 = 0.0; 730 sum5 = 0.0; 731 for (j=0; j<n; j++) { 732 sum1 += v[jrow]*x[5*idx[jrow]]; 733 sum2 += v[jrow]*x[5*idx[jrow]+1]; 734 sum3 += v[jrow]*x[5*idx[jrow]+2]; 735 sum4 += v[jrow]*x[5*idx[jrow]+3]; 736 sum5 += v[jrow]*x[5*idx[jrow]+4]; 737 jrow++; 738 } 739 y[5*i] += sum1; 740 y[5*i+1] += sum2; 741 y[5*i+2] += sum3; 742 y[5*i+3] += sum4; 743 y[5*i+4] += sum5; 744 } 745 746 PetscLogFlops(10*a->nz); 747 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 748 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 754 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 755 { 756 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 757 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 758 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 759 PetscErrorCode ierr; 760 PetscInt m = b->AIJ->m,n,i,*idx; 761 762 PetscFunctionBegin; 763 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 764 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 765 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 766 767 for (i=0; i<m; i++) { 768 idx = a->j + a->i[i] ; 769 v = a->a + a->i[i] ; 770 n = a->i[i+1] - a->i[i]; 771 alpha1 = x[5*i]; 772 alpha2 = x[5*i+1]; 773 alpha3 = x[5*i+2]; 774 alpha4 = x[5*i+3]; 775 alpha5 = x[5*i+4]; 776 while (n-->0) { 777 y[5*(*idx)] += alpha1*(*v); 778 y[5*(*idx)+1] += alpha2*(*v); 779 y[5*(*idx)+2] += alpha3*(*v); 780 y[5*(*idx)+3] += alpha4*(*v); 781 y[5*(*idx)+4] += alpha5*(*v); 782 idx++; v++; 783 } 784 } 785 PetscLogFlops(10*a->nz); 786 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 787 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 788 PetscFunctionReturn(0); 789 } 790 791 /* ------------------------------------------------------------------------------*/ 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatMult_SeqMAIJ_6" 794 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 795 { 796 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 798 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 799 PetscErrorCode ierr; 800 PetscInt m = b->AIJ->m,*idx,*ii; 801 PetscInt n,i,jrow,j; 802 803 PetscFunctionBegin; 804 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 805 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 806 idx = a->j; 807 v = a->a; 808 ii = a->i; 809 810 for (i=0; i<m; i++) { 811 jrow = ii[i]; 812 n = ii[i+1] - jrow; 813 sum1 = 0.0; 814 sum2 = 0.0; 815 sum3 = 0.0; 816 sum4 = 0.0; 817 sum5 = 0.0; 818 sum6 = 0.0; 819 for (j=0; j<n; j++) { 820 sum1 += v[jrow]*x[6*idx[jrow]]; 821 sum2 += v[jrow]*x[6*idx[jrow]+1]; 822 sum3 += v[jrow]*x[6*idx[jrow]+2]; 823 sum4 += v[jrow]*x[6*idx[jrow]+3]; 824 sum5 += v[jrow]*x[6*idx[jrow]+4]; 825 sum6 += v[jrow]*x[6*idx[jrow]+5]; 826 jrow++; 827 } 828 y[6*i] = sum1; 829 y[6*i+1] = sum2; 830 y[6*i+2] = sum3; 831 y[6*i+3] = sum4; 832 y[6*i+4] = sum5; 833 y[6*i+5] = sum6; 834 } 835 836 PetscLogFlops(12*a->nz - 6*m); 837 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 838 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 844 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 845 { 846 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 848 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 849 PetscErrorCode ierr; 850 PetscInt m = b->AIJ->m,n,i,*idx; 851 852 PetscFunctionBegin; 853 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 856 857 for (i=0; i<m; i++) { 858 idx = a->j + a->i[i] ; 859 v = a->a + a->i[i] ; 860 n = a->i[i+1] - a->i[i]; 861 alpha1 = x[6*i]; 862 alpha2 = x[6*i+1]; 863 alpha3 = x[6*i+2]; 864 alpha4 = x[6*i+3]; 865 alpha5 = x[6*i+4]; 866 alpha6 = x[6*i+5]; 867 while (n-->0) { 868 y[6*(*idx)] += alpha1*(*v); 869 y[6*(*idx)+1] += alpha2*(*v); 870 y[6*(*idx)+2] += alpha3*(*v); 871 y[6*(*idx)+3] += alpha4*(*v); 872 y[6*(*idx)+4] += alpha5*(*v); 873 y[6*(*idx)+5] += alpha6*(*v); 874 idx++; v++; 875 } 876 } 877 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 885 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 886 { 887 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 888 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 889 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 890 PetscErrorCode ierr; 891 PetscInt m = b->AIJ->m,*idx,*ii; 892 PetscInt n,i,jrow,j; 893 894 PetscFunctionBegin; 895 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 896 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 897 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 898 idx = a->j; 899 v = a->a; 900 ii = a->i; 901 902 for (i=0; i<m; i++) { 903 jrow = ii[i]; 904 n = ii[i+1] - jrow; 905 sum1 = 0.0; 906 sum2 = 0.0; 907 sum3 = 0.0; 908 sum4 = 0.0; 909 sum5 = 0.0; 910 sum6 = 0.0; 911 for (j=0; j<n; j++) { 912 sum1 += v[jrow]*x[6*idx[jrow]]; 913 sum2 += v[jrow]*x[6*idx[jrow]+1]; 914 sum3 += v[jrow]*x[6*idx[jrow]+2]; 915 sum4 += v[jrow]*x[6*idx[jrow]+3]; 916 sum5 += v[jrow]*x[6*idx[jrow]+4]; 917 sum6 += v[jrow]*x[6*idx[jrow]+5]; 918 jrow++; 919 } 920 y[6*i] += sum1; 921 y[6*i+1] += sum2; 922 y[6*i+2] += sum3; 923 y[6*i+3] += sum4; 924 y[6*i+4] += sum5; 925 y[6*i+5] += sum6; 926 } 927 928 PetscLogFlops(12*a->nz); 929 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 930 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 936 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 939 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 940 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 941 PetscErrorCode ierr; 942 PetscInt m = b->AIJ->m,n,i,*idx; 943 944 PetscFunctionBegin; 945 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 946 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 947 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 948 949 for (i=0; i<m; i++) { 950 idx = a->j + a->i[i] ; 951 v = a->a + a->i[i] ; 952 n = a->i[i+1] - a->i[i]; 953 alpha1 = x[6*i]; 954 alpha2 = x[6*i+1]; 955 alpha3 = x[6*i+2]; 956 alpha4 = x[6*i+3]; 957 alpha5 = x[6*i+4]; 958 alpha6 = x[6*i+5]; 959 while (n-->0) { 960 y[6*(*idx)] += alpha1*(*v); 961 y[6*(*idx)+1] += alpha2*(*v); 962 y[6*(*idx)+2] += alpha3*(*v); 963 y[6*(*idx)+3] += alpha4*(*v); 964 y[6*(*idx)+4] += alpha5*(*v); 965 y[6*(*idx)+5] += alpha6*(*v); 966 idx++; v++; 967 } 968 } 969 PetscLogFlops(12*a->nz); 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 /* ------------------------------------------------------------------------------*/ 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatMult_SeqMAIJ_8" 978 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 979 { 980 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 981 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 982 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 983 PetscErrorCode ierr; 984 PetscInt m = b->AIJ->m,*idx,*ii; 985 PetscInt n,i,jrow,j; 986 987 PetscFunctionBegin; 988 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 989 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 990 idx = a->j; 991 v = a->a; 992 ii = a->i; 993 994 for (i=0; i<m; i++) { 995 jrow = ii[i]; 996 n = ii[i+1] - jrow; 997 sum1 = 0.0; 998 sum2 = 0.0; 999 sum3 = 0.0; 1000 sum4 = 0.0; 1001 sum5 = 0.0; 1002 sum6 = 0.0; 1003 sum7 = 0.0; 1004 sum8 = 0.0; 1005 for (j=0; j<n; j++) { 1006 sum1 += v[jrow]*x[8*idx[jrow]]; 1007 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1008 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1009 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1010 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1011 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1012 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1013 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1014 jrow++; 1015 } 1016 y[8*i] = sum1; 1017 y[8*i+1] = sum2; 1018 y[8*i+2] = sum3; 1019 y[8*i+3] = sum4; 1020 y[8*i+4] = sum5; 1021 y[8*i+5] = sum6; 1022 y[8*i+6] = sum7; 1023 y[8*i+7] = sum8; 1024 } 1025 1026 PetscLogFlops(16*a->nz - 8*m); 1027 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1028 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1034 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1035 { 1036 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1037 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1038 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1039 PetscErrorCode ierr; 1040 PetscInt m = b->AIJ->m,n,i,*idx; 1041 1042 PetscFunctionBegin; 1043 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1046 1047 for (i=0; i<m; i++) { 1048 idx = a->j + a->i[i] ; 1049 v = a->a + a->i[i] ; 1050 n = a->i[i+1] - a->i[i]; 1051 alpha1 = x[8*i]; 1052 alpha2 = x[8*i+1]; 1053 alpha3 = x[8*i+2]; 1054 alpha4 = x[8*i+3]; 1055 alpha5 = x[8*i+4]; 1056 alpha6 = x[8*i+5]; 1057 alpha7 = x[8*i+6]; 1058 alpha8 = x[8*i+7]; 1059 while (n-->0) { 1060 y[8*(*idx)] += alpha1*(*v); 1061 y[8*(*idx)+1] += alpha2*(*v); 1062 y[8*(*idx)+2] += alpha3*(*v); 1063 y[8*(*idx)+3] += alpha4*(*v); 1064 y[8*(*idx)+4] += alpha5*(*v); 1065 y[8*(*idx)+5] += alpha6*(*v); 1066 y[8*(*idx)+6] += alpha7*(*v); 1067 y[8*(*idx)+7] += alpha8*(*v); 1068 idx++; v++; 1069 } 1070 } 1071 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1079 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1080 { 1081 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1082 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1083 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1084 PetscErrorCode ierr; 1085 PetscInt m = b->AIJ->m,*idx,*ii; 1086 PetscInt n,i,jrow,j; 1087 1088 PetscFunctionBegin; 1089 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1090 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1091 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1092 idx = a->j; 1093 v = a->a; 1094 ii = a->i; 1095 1096 for (i=0; i<m; i++) { 1097 jrow = ii[i]; 1098 n = ii[i+1] - jrow; 1099 sum1 = 0.0; 1100 sum2 = 0.0; 1101 sum3 = 0.0; 1102 sum4 = 0.0; 1103 sum5 = 0.0; 1104 sum6 = 0.0; 1105 sum7 = 0.0; 1106 sum8 = 0.0; 1107 for (j=0; j<n; j++) { 1108 sum1 += v[jrow]*x[8*idx[jrow]]; 1109 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1110 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1111 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1112 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1113 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1114 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1115 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1116 jrow++; 1117 } 1118 y[8*i] += sum1; 1119 y[8*i+1] += sum2; 1120 y[8*i+2] += sum3; 1121 y[8*i+3] += sum4; 1122 y[8*i+4] += sum5; 1123 y[8*i+5] += sum6; 1124 y[8*i+6] += sum7; 1125 y[8*i+7] += sum8; 1126 } 1127 1128 PetscLogFlops(16*a->nz); 1129 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1130 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1136 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1137 { 1138 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1139 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1140 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1141 PetscErrorCode ierr; 1142 PetscInt m = b->AIJ->m,n,i,*idx; 1143 1144 PetscFunctionBegin; 1145 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1146 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1147 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1148 for (i=0; i<m; i++) { 1149 idx = a->j + a->i[i] ; 1150 v = a->a + a->i[i] ; 1151 n = a->i[i+1] - a->i[i]; 1152 alpha1 = x[8*i]; 1153 alpha2 = x[8*i+1]; 1154 alpha3 = x[8*i+2]; 1155 alpha4 = x[8*i+3]; 1156 alpha5 = x[8*i+4]; 1157 alpha6 = x[8*i+5]; 1158 alpha7 = x[8*i+6]; 1159 alpha8 = x[8*i+7]; 1160 while (n-->0) { 1161 y[8*(*idx)] += alpha1*(*v); 1162 y[8*(*idx)+1] += alpha2*(*v); 1163 y[8*(*idx)+2] += alpha3*(*v); 1164 y[8*(*idx)+3] += alpha4*(*v); 1165 y[8*(*idx)+4] += alpha5*(*v); 1166 y[8*(*idx)+5] += alpha6*(*v); 1167 y[8*(*idx)+6] += alpha7*(*v); 1168 y[8*(*idx)+7] += alpha8*(*v); 1169 idx++; v++; 1170 } 1171 } 1172 PetscLogFlops(16*a->nz); 1173 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1174 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 /* ------------------------------------------------------------------------------*/ 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1181 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1182 { 1183 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1185 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1186 PetscErrorCode ierr; 1187 PetscInt m = b->AIJ->m,*idx,*ii; 1188 PetscInt n,i,jrow,j; 1189 1190 PetscFunctionBegin; 1191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1192 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1193 idx = a->j; 1194 v = a->a; 1195 ii = a->i; 1196 1197 for (i=0; i<m; i++) { 1198 jrow = ii[i]; 1199 n = ii[i+1] - jrow; 1200 sum1 = 0.0; 1201 sum2 = 0.0; 1202 sum3 = 0.0; 1203 sum4 = 0.0; 1204 sum5 = 0.0; 1205 sum6 = 0.0; 1206 sum7 = 0.0; 1207 sum8 = 0.0; 1208 sum9 = 0.0; 1209 for (j=0; j<n; j++) { 1210 sum1 += v[jrow]*x[9*idx[jrow]]; 1211 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1212 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1213 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1214 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1215 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1216 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1217 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1218 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1219 jrow++; 1220 } 1221 y[9*i] = sum1; 1222 y[9*i+1] = sum2; 1223 y[9*i+2] = sum3; 1224 y[9*i+3] = sum4; 1225 y[9*i+4] = sum5; 1226 y[9*i+5] = sum6; 1227 y[9*i+6] = sum7; 1228 y[9*i+7] = sum8; 1229 y[9*i+8] = sum9; 1230 } 1231 1232 PetscLogFlops(18*a->nz - 9*m); 1233 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1234 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1240 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1241 { 1242 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1243 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1244 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1245 PetscErrorCode ierr; 1246 PetscInt m = b->AIJ->m,n,i,*idx; 1247 1248 PetscFunctionBegin; 1249 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1250 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1251 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1252 1253 for (i=0; i<m; i++) { 1254 idx = a->j + a->i[i] ; 1255 v = a->a + a->i[i] ; 1256 n = a->i[i+1] - a->i[i]; 1257 alpha1 = x[9*i]; 1258 alpha2 = x[9*i+1]; 1259 alpha3 = x[9*i+2]; 1260 alpha4 = x[9*i+3]; 1261 alpha5 = x[9*i+4]; 1262 alpha6 = x[9*i+5]; 1263 alpha7 = x[9*i+6]; 1264 alpha8 = x[9*i+7]; 1265 alpha9 = x[9*i+8]; 1266 while (n-->0) { 1267 y[9*(*idx)] += alpha1*(*v); 1268 y[9*(*idx)+1] += alpha2*(*v); 1269 y[9*(*idx)+2] += alpha3*(*v); 1270 y[9*(*idx)+3] += alpha4*(*v); 1271 y[9*(*idx)+4] += alpha5*(*v); 1272 y[9*(*idx)+5] += alpha6*(*v); 1273 y[9*(*idx)+6] += alpha7*(*v); 1274 y[9*(*idx)+7] += alpha8*(*v); 1275 y[9*(*idx)+8] += alpha9*(*v); 1276 idx++; v++; 1277 } 1278 } 1279 PetscLogFlops(18*a->nz - 9*b->AIJ->n); 1280 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1281 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1287 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1288 { 1289 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1291 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1292 PetscErrorCode ierr; 1293 PetscInt m = b->AIJ->m,*idx,*ii; 1294 PetscInt n,i,jrow,j; 1295 1296 PetscFunctionBegin; 1297 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1298 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1299 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1300 idx = a->j; 1301 v = a->a; 1302 ii = a->i; 1303 1304 for (i=0; i<m; i++) { 1305 jrow = ii[i]; 1306 n = ii[i+1] - jrow; 1307 sum1 = 0.0; 1308 sum2 = 0.0; 1309 sum3 = 0.0; 1310 sum4 = 0.0; 1311 sum5 = 0.0; 1312 sum6 = 0.0; 1313 sum7 = 0.0; 1314 sum8 = 0.0; 1315 sum9 = 0.0; 1316 for (j=0; j<n; j++) { 1317 sum1 += v[jrow]*x[9*idx[jrow]]; 1318 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1319 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1320 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1321 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1322 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1323 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1324 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1325 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1326 jrow++; 1327 } 1328 y[9*i] += sum1; 1329 y[9*i+1] += sum2; 1330 y[9*i+2] += sum3; 1331 y[9*i+3] += sum4; 1332 y[9*i+4] += sum5; 1333 y[9*i+5] += sum6; 1334 y[9*i+6] += sum7; 1335 y[9*i+7] += sum8; 1336 y[9*i+8] += sum9; 1337 } 1338 1339 PetscLogFlops(18*a->nz); 1340 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1341 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1347 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1348 { 1349 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1350 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1351 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1352 PetscErrorCode ierr; 1353 PetscInt m = b->AIJ->m,n,i,*idx; 1354 1355 PetscFunctionBegin; 1356 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1358 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1359 for (i=0; i<m; i++) { 1360 idx = a->j + a->i[i] ; 1361 v = a->a + a->i[i] ; 1362 n = a->i[i+1] - a->i[i]; 1363 alpha1 = x[9*i]; 1364 alpha2 = x[9*i+1]; 1365 alpha3 = x[9*i+2]; 1366 alpha4 = x[9*i+3]; 1367 alpha5 = x[9*i+4]; 1368 alpha6 = x[9*i+5]; 1369 alpha7 = x[9*i+6]; 1370 alpha8 = x[9*i+7]; 1371 alpha9 = x[9*i+8]; 1372 while (n-->0) { 1373 y[9*(*idx)] += alpha1*(*v); 1374 y[9*(*idx)+1] += alpha2*(*v); 1375 y[9*(*idx)+2] += alpha3*(*v); 1376 y[9*(*idx)+3] += alpha4*(*v); 1377 y[9*(*idx)+4] += alpha5*(*v); 1378 y[9*(*idx)+5] += alpha6*(*v); 1379 y[9*(*idx)+6] += alpha7*(*v); 1380 y[9*(*idx)+7] += alpha8*(*v); 1381 y[9*(*idx)+8] += alpha9*(*v); 1382 idx++; v++; 1383 } 1384 } 1385 PetscLogFlops(18*a->nz); 1386 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 /*--------------------------------------------------------------------------------------------*/ 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1394 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1395 { 1396 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1397 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1398 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1399 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1400 PetscErrorCode ierr; 1401 PetscInt m = b->AIJ->m,*idx,*ii; 1402 PetscInt n,i,jrow,j; 1403 1404 PetscFunctionBegin; 1405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1406 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1407 idx = a->j; 1408 v = a->a; 1409 ii = a->i; 1410 1411 for (i=0; i<m; i++) { 1412 jrow = ii[i]; 1413 n = ii[i+1] - jrow; 1414 sum1 = 0.0; 1415 sum2 = 0.0; 1416 sum3 = 0.0; 1417 sum4 = 0.0; 1418 sum5 = 0.0; 1419 sum6 = 0.0; 1420 sum7 = 0.0; 1421 sum8 = 0.0; 1422 sum9 = 0.0; 1423 sum10 = 0.0; 1424 sum11 = 0.0; 1425 sum12 = 0.0; 1426 sum13 = 0.0; 1427 sum14 = 0.0; 1428 sum15 = 0.0; 1429 sum16 = 0.0; 1430 for (j=0; j<n; j++) { 1431 sum1 += v[jrow]*x[16*idx[jrow]]; 1432 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1433 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1434 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1435 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1436 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1437 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1438 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1439 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1440 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1441 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1442 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1443 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1444 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1445 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1446 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1447 jrow++; 1448 } 1449 y[16*i] = sum1; 1450 y[16*i+1] = sum2; 1451 y[16*i+2] = sum3; 1452 y[16*i+3] = sum4; 1453 y[16*i+4] = sum5; 1454 y[16*i+5] = sum6; 1455 y[16*i+6] = sum7; 1456 y[16*i+7] = sum8; 1457 y[16*i+8] = sum9; 1458 y[16*i+9] = sum10; 1459 y[16*i+10] = sum11; 1460 y[16*i+11] = sum12; 1461 y[16*i+12] = sum13; 1462 y[16*i+13] = sum14; 1463 y[16*i+14] = sum15; 1464 y[16*i+15] = sum16; 1465 } 1466 1467 PetscLogFlops(32*a->nz - 16*m); 1468 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1469 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1475 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1476 { 1477 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1478 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1479 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1480 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1481 PetscErrorCode ierr; 1482 PetscInt m = b->AIJ->m,n,i,*idx; 1483 1484 PetscFunctionBegin; 1485 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1486 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1487 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1488 1489 for (i=0; i<m; i++) { 1490 idx = a->j + a->i[i] ; 1491 v = a->a + a->i[i] ; 1492 n = a->i[i+1] - a->i[i]; 1493 alpha1 = x[16*i]; 1494 alpha2 = x[16*i+1]; 1495 alpha3 = x[16*i+2]; 1496 alpha4 = x[16*i+3]; 1497 alpha5 = x[16*i+4]; 1498 alpha6 = x[16*i+5]; 1499 alpha7 = x[16*i+6]; 1500 alpha8 = x[16*i+7]; 1501 alpha9 = x[16*i+8]; 1502 alpha10 = x[16*i+9]; 1503 alpha11 = x[16*i+10]; 1504 alpha12 = x[16*i+11]; 1505 alpha13 = x[16*i+12]; 1506 alpha14 = x[16*i+13]; 1507 alpha15 = x[16*i+14]; 1508 alpha16 = x[16*i+15]; 1509 while (n-->0) { 1510 y[16*(*idx)] += alpha1*(*v); 1511 y[16*(*idx)+1] += alpha2*(*v); 1512 y[16*(*idx)+2] += alpha3*(*v); 1513 y[16*(*idx)+3] += alpha4*(*v); 1514 y[16*(*idx)+4] += alpha5*(*v); 1515 y[16*(*idx)+5] += alpha6*(*v); 1516 y[16*(*idx)+6] += alpha7*(*v); 1517 y[16*(*idx)+7] += alpha8*(*v); 1518 y[16*(*idx)+8] += alpha9*(*v); 1519 y[16*(*idx)+9] += alpha10*(*v); 1520 y[16*(*idx)+10] += alpha11*(*v); 1521 y[16*(*idx)+11] += alpha12*(*v); 1522 y[16*(*idx)+12] += alpha13*(*v); 1523 y[16*(*idx)+13] += alpha14*(*v); 1524 y[16*(*idx)+14] += alpha15*(*v); 1525 y[16*(*idx)+15] += alpha16*(*v); 1526 idx++; v++; 1527 } 1528 } 1529 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1530 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1531 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1537 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1538 { 1539 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1541 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1542 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1543 PetscErrorCode ierr; 1544 PetscInt m = b->AIJ->m,*idx,*ii; 1545 PetscInt n,i,jrow,j; 1546 1547 PetscFunctionBegin; 1548 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1549 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1550 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1551 idx = a->j; 1552 v = a->a; 1553 ii = a->i; 1554 1555 for (i=0; i<m; i++) { 1556 jrow = ii[i]; 1557 n = ii[i+1] - jrow; 1558 sum1 = 0.0; 1559 sum2 = 0.0; 1560 sum3 = 0.0; 1561 sum4 = 0.0; 1562 sum5 = 0.0; 1563 sum6 = 0.0; 1564 sum7 = 0.0; 1565 sum8 = 0.0; 1566 sum9 = 0.0; 1567 sum10 = 0.0; 1568 sum11 = 0.0; 1569 sum12 = 0.0; 1570 sum13 = 0.0; 1571 sum14 = 0.0; 1572 sum15 = 0.0; 1573 sum16 = 0.0; 1574 for (j=0; j<n; j++) { 1575 sum1 += v[jrow]*x[16*idx[jrow]]; 1576 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1577 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1578 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1579 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1580 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1581 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1582 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1583 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1584 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1585 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1586 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1587 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1588 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1589 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1590 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1591 jrow++; 1592 } 1593 y[16*i] += sum1; 1594 y[16*i+1] += sum2; 1595 y[16*i+2] += sum3; 1596 y[16*i+3] += sum4; 1597 y[16*i+4] += sum5; 1598 y[16*i+5] += sum6; 1599 y[16*i+6] += sum7; 1600 y[16*i+7] += sum8; 1601 y[16*i+8] += sum9; 1602 y[16*i+9] += sum10; 1603 y[16*i+10] += sum11; 1604 y[16*i+11] += sum12; 1605 y[16*i+12] += sum13; 1606 y[16*i+13] += sum14; 1607 y[16*i+14] += sum15; 1608 y[16*i+15] += sum16; 1609 } 1610 1611 PetscLogFlops(32*a->nz); 1612 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1613 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1619 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1620 { 1621 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1622 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1623 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1624 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1625 PetscErrorCode ierr; 1626 PetscInt m = b->AIJ->m,n,i,*idx; 1627 1628 PetscFunctionBegin; 1629 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1630 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1631 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1632 for (i=0; i<m; i++) { 1633 idx = a->j + a->i[i] ; 1634 v = a->a + a->i[i] ; 1635 n = a->i[i+1] - a->i[i]; 1636 alpha1 = x[16*i]; 1637 alpha2 = x[16*i+1]; 1638 alpha3 = x[16*i+2]; 1639 alpha4 = x[16*i+3]; 1640 alpha5 = x[16*i+4]; 1641 alpha6 = x[16*i+5]; 1642 alpha7 = x[16*i+6]; 1643 alpha8 = x[16*i+7]; 1644 alpha9 = x[16*i+8]; 1645 alpha10 = x[16*i+9]; 1646 alpha11 = x[16*i+10]; 1647 alpha12 = x[16*i+11]; 1648 alpha13 = x[16*i+12]; 1649 alpha14 = x[16*i+13]; 1650 alpha15 = x[16*i+14]; 1651 alpha16 = x[16*i+15]; 1652 while (n-->0) { 1653 y[16*(*idx)] += alpha1*(*v); 1654 y[16*(*idx)+1] += alpha2*(*v); 1655 y[16*(*idx)+2] += alpha3*(*v); 1656 y[16*(*idx)+3] += alpha4*(*v); 1657 y[16*(*idx)+4] += alpha5*(*v); 1658 y[16*(*idx)+5] += alpha6*(*v); 1659 y[16*(*idx)+6] += alpha7*(*v); 1660 y[16*(*idx)+7] += alpha8*(*v); 1661 y[16*(*idx)+8] += alpha9*(*v); 1662 y[16*(*idx)+9] += alpha10*(*v); 1663 y[16*(*idx)+10] += alpha11*(*v); 1664 y[16*(*idx)+11] += alpha12*(*v); 1665 y[16*(*idx)+12] += alpha13*(*v); 1666 y[16*(*idx)+13] += alpha14*(*v); 1667 y[16*(*idx)+14] += alpha15*(*v); 1668 y[16*(*idx)+15] += alpha16*(*v); 1669 idx++; v++; 1670 } 1671 } 1672 PetscLogFlops(32*a->nz); 1673 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1674 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /*===================================================================================*/ 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1681 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1682 { 1683 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1684 PetscErrorCode ierr; 1685 1686 PetscFunctionBegin; 1687 /* start the scatter */ 1688 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1689 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1690 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1691 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1697 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1698 { 1699 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1704 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1705 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1706 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1712 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1713 { 1714 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 /* start the scatter */ 1719 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1720 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1721 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1722 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1728 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1729 { 1730 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1735 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1736 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1737 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1743 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 1744 { 1745 /* This routine requires testing -- but it's getting better. */ 1746 PetscErrorCode ierr; 1747 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1748 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1749 Mat P=pp->AIJ; 1750 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1751 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1752 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 1753 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn; 1754 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1755 MatScalar *ca; 1756 1757 PetscFunctionBegin; 1758 /* Start timer */ 1759 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1760 1761 /* Get ij structure of P^T */ 1762 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1763 1764 cn = pn*ppdof; 1765 /* Allocate ci array, arrays for fill computation and */ 1766 /* free space for accumulating nonzero column info */ 1767 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1768 ci[0] = 0; 1769 1770 /* Work arrays for rows of P^T*A */ 1771 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1772 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1773 ptasparserow = ptadenserow + an; 1774 denserow = ptasparserow + an; 1775 sparserow = denserow + cn; 1776 1777 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1778 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1779 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 1780 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1781 current_space = free_space; 1782 1783 /* Determine symbolic info for each row of C: */ 1784 for (i=0;i<pn;i++) { 1785 ptnzi = pti[i+1] - pti[i]; 1786 ptJ = ptj + pti[i]; 1787 for (dof=0;dof<ppdof;dof++) { 1788 ptanzi = 0; 1789 /* Determine symbolic row of PtA: */ 1790 for (j=0;j<ptnzi;j++) { 1791 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 1792 arow = ptJ[j]*ppdof + dof; 1793 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 1794 anzj = ai[arow+1] - ai[arow]; 1795 ajj = aj + ai[arow]; 1796 for (k=0;k<anzj;k++) { 1797 if (!ptadenserow[ajj[k]]) { 1798 ptadenserow[ajj[k]] = -1; 1799 ptasparserow[ptanzi++] = ajj[k]; 1800 } 1801 } 1802 } 1803 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1804 ptaj = ptasparserow; 1805 cnzi = 0; 1806 for (j=0;j<ptanzi;j++) { 1807 /* Get offset within block of P */ 1808 pshift = *ptaj%ppdof; 1809 /* Get block row of P */ 1810 prow = (*ptaj++)/ppdof; /* integer division */ 1811 /* P has same number of nonzeros per row as the compressed form */ 1812 pnzj = pi[prow+1] - pi[prow]; 1813 pjj = pj + pi[prow]; 1814 for (k=0;k<pnzj;k++) { 1815 /* Locations in C are shifted by the offset within the block */ 1816 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 1817 if (!denserow[pjj[k]*ppdof+pshift]) { 1818 denserow[pjj[k]*ppdof+pshift] = -1; 1819 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 1820 } 1821 } 1822 } 1823 1824 /* sort sparserow */ 1825 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1826 1827 /* If free space is not available, make more free space */ 1828 /* Double the amount of total space in the list */ 1829 if (current_space->local_remaining<cnzi) { 1830 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1831 } 1832 1833 /* Copy data into free space, and zero out denserows */ 1834 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1835 current_space->array += cnzi; 1836 current_space->local_used += cnzi; 1837 current_space->local_remaining -= cnzi; 1838 1839 for (j=0;j<ptanzi;j++) { 1840 ptadenserow[ptasparserow[j]] = 0; 1841 } 1842 for (j=0;j<cnzi;j++) { 1843 denserow[sparserow[j]] = 0; 1844 } 1845 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1846 /* For now, we will recompute what is needed. */ 1847 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 1848 } 1849 } 1850 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1851 /* Allocate space for cj, initialize cj, and */ 1852 /* destroy list of free space and other temporary array(s) */ 1853 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1854 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1855 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1856 1857 /* Allocate space for ca */ 1858 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1859 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1860 1861 /* put together the new matrix */ 1862 ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 1863 1864 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1865 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1866 c = (Mat_SeqAIJ *)((*C)->data); 1867 c->freedata = PETSC_TRUE; 1868 c->nonew = 0; 1869 1870 /* Clean up. */ 1871 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1872 1873 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 1879 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 1880 { 1881 /* This routine requires testing -- first draft only */ 1882 PetscErrorCode ierr; 1883 PetscInt flops=0; 1884 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1885 Mat P=pp->AIJ; 1886 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1887 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1888 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1889 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1890 PetscInt *ci=c->i,*cj=c->j,*cjj; 1891 PetscInt am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof; 1892 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1893 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1894 1895 PetscFunctionBegin; 1896 /* Allocate temporary array for storage of one row of A*P */ 1897 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1898 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1899 1900 apj = (PetscInt *)(apa + cn); 1901 apjdense = apj + cn; 1902 1903 /* Clear old values in C */ 1904 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1905 1906 for (i=0;i<am;i++) { 1907 /* Form sparse row of A*P */ 1908 anzi = ai[i+1] - ai[i]; 1909 apnzj = 0; 1910 for (j=0;j<anzi;j++) { 1911 /* Get offset within block of P */ 1912 pshift = *aj%ppdof; 1913 /* Get block row of P */ 1914 prow = *aj++/ppdof; /* integer division */ 1915 pnzj = pi[prow+1] - pi[prow]; 1916 pjj = pj + pi[prow]; 1917 paj = pa + pi[prow]; 1918 for (k=0;k<pnzj;k++) { 1919 poffset = pjj[k]*ppdof+pshift; 1920 if (!apjdense[poffset]) { 1921 apjdense[poffset] = -1; 1922 apj[apnzj++] = poffset; 1923 } 1924 apa[poffset] += (*aa)*paj[k]; 1925 } 1926 flops += 2*pnzj; 1927 aa++; 1928 } 1929 1930 /* Sort the j index array for quick sparse axpy. */ 1931 /* Note: a array does not need sorting as it is in dense storage locations. */ 1932 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1933 1934 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1935 prow = i/ppdof; /* integer division */ 1936 pshift = i%ppdof; 1937 poffset = pi[prow]; 1938 pnzi = pi[prow+1] - poffset; 1939 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 1940 pJ = pj+poffset; 1941 pA = pa+poffset; 1942 for (j=0;j<pnzi;j++) { 1943 crow = (*pJ)*ppdof+pshift; 1944 cjj = cj + ci[crow]; 1945 caj = ca + ci[crow]; 1946 pJ++; 1947 /* Perform sparse axpy operation. Note cjj includes apj. */ 1948 for (k=0,nextap=0;nextap<apnzj;k++) { 1949 if (cjj[k]==apj[nextap]) { 1950 caj[k] += (*pA)*apa[apj[nextap++]]; 1951 } 1952 } 1953 flops += 2*apnzj; 1954 pA++; 1955 } 1956 1957 /* Zero the current row info for A*P */ 1958 for (j=0;j<apnzj;j++) { 1959 apa[apj[j]] = 0.; 1960 apjdense[apj[j]] = 0; 1961 } 1962 } 1963 1964 /* Assemble the final matrix and clean up */ 1965 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1966 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = PetscFree(apa);CHKERRQ(ierr); 1968 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1969 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 1975 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 1976 { 1977 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1978 Mat a = b->AIJ,B; 1979 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 1980 PetscErrorCode ierr; 1981 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 1982 PetscInt *cols; 1983 PetscScalar *vals; 1984 1985 PetscFunctionBegin; 1986 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 1987 ierr = PetscMalloc(dof*m*sizeof(int),&ilen);CHKERRQ(ierr); 1988 for (i=0; i<m; i++) { 1989 nmax = PetscMax(nmax,aij->ilen[i]); 1990 for (j=0; j<dof; j++) { 1991 ilen[dof*i+j] = aij->ilen[i]; 1992 } 1993 } 1994 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 1995 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1996 ierr = PetscFree(ilen);CHKERRQ(ierr); 1997 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 1998 ii = 0; 1999 for (i=0; i<m; i++) { 2000 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2001 for (j=0; j<dof; j++) { 2002 for (k=0; k<ncols; k++) { 2003 icols[k] = dof*cols[k]+j; 2004 } 2005 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2006 ii++; 2007 } 2008 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2009 } 2010 ierr = PetscFree(icols);CHKERRQ(ierr); 2011 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2012 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 2014 if (reuse == MAT_REUSE_MATRIX) { 2015 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 2016 } else { 2017 *newmat = B; 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #include "src/mat/impls/aij/mpi/mpiaij.h" 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2025 PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 2026 { 2027 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2028 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2029 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2030 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2031 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2032 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2033 PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols; 2034 PetscInt rstart,cstart,*garray,ii,k; 2035 PetscErrorCode ierr; 2036 PetscScalar *vals,*ovals; 2037 2038 PetscFunctionBegin; 2039 ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr); 2040 for (i=0; i<A->m/dof; i++) { 2041 nmax = PetscMax(nmax,AIJ->ilen[i]); 2042 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2043 for (j=0; j<dof; j++) { 2044 dnz[dof*i+j] = AIJ->ilen[i]; 2045 onz[dof*i+j] = OAIJ->ilen[i]; 2046 } 2047 } 2048 ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2049 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2050 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2051 2052 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2053 rstart = dof*mpiaij->rstart; 2054 cstart = dof*mpiaij->cstart; 2055 garray = mpiaij->garray; 2056 2057 ii = rstart; 2058 for (i=0; i<A->m/dof; i++) { 2059 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2060 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2061 for (j=0; j<dof; j++) { 2062 for (k=0; k<ncols; k++) { 2063 icols[k] = cstart + dof*cols[k]+j; 2064 } 2065 for (k=0; k<oncols; k++) { 2066 oicols[k] = dof*garray[ocols[k]]+j; 2067 } 2068 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2069 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2070 ii++; 2071 } 2072 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2073 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2074 } 2075 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 2080 if (reuse == MAT_REUSE_MATRIX) { 2081 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 2082 } else { 2083 *newmat = B; 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 2089 2090 /* ---------------------------------------------------------------------------------- */ 2091 /*MC 2092 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2093 operations for multicomponent problems. It interpolates each component the same 2094 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2095 and MATMPIAIJ for distributed matrices. 2096 2097 Operations provided: 2098 + MatMult 2099 . MatMultTranspose 2100 . MatMultAdd 2101 . MatMultTransposeAdd 2102 - MatView 2103 2104 Level: advanced 2105 2106 M*/ 2107 #undef __FUNCT__ 2108 #define __FUNCT__ "MatCreateMAIJ" 2109 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2110 { 2111 PetscErrorCode ierr; 2112 PetscMPIInt size; 2113 PetscInt n; 2114 Mat_MPIMAIJ *b; 2115 Mat B; 2116 2117 PetscFunctionBegin; 2118 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2119 2120 if (dof == 1) { 2121 *maij = A; 2122 } else { 2123 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 2124 B->assembled = PETSC_TRUE; 2125 2126 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2127 if (size == 1) { 2128 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2129 B->ops->destroy = MatDestroy_SeqMAIJ; 2130 B->ops->view = MatView_SeqMAIJ; 2131 b = (Mat_MPIMAIJ*)B->data; 2132 b->dof = dof; 2133 b->AIJ = A; 2134 if (dof == 2) { 2135 B->ops->mult = MatMult_SeqMAIJ_2; 2136 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2137 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2138 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2139 } else if (dof == 3) { 2140 B->ops->mult = MatMult_SeqMAIJ_3; 2141 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2142 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2143 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2144 } else if (dof == 4) { 2145 B->ops->mult = MatMult_SeqMAIJ_4; 2146 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2147 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2148 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2149 } else if (dof == 5) { 2150 B->ops->mult = MatMult_SeqMAIJ_5; 2151 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2152 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2153 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2154 } else if (dof == 6) { 2155 B->ops->mult = MatMult_SeqMAIJ_6; 2156 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2157 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2158 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2159 } else if (dof == 8) { 2160 B->ops->mult = MatMult_SeqMAIJ_8; 2161 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2162 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2163 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2164 } else if (dof == 9) { 2165 B->ops->mult = MatMult_SeqMAIJ_9; 2166 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2167 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2168 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2169 } else if (dof == 16) { 2170 B->ops->mult = MatMult_SeqMAIJ_16; 2171 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2172 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2173 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2174 } else { 2175 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2176 } 2177 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2178 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2179 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2180 } else { 2181 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2182 IS from,to; 2183 Vec gvec; 2184 PetscInt *garray,i; 2185 2186 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2187 B->ops->destroy = MatDestroy_MPIMAIJ; 2188 B->ops->view = MatView_MPIMAIJ; 2189 b = (Mat_MPIMAIJ*)B->data; 2190 b->dof = dof; 2191 b->A = A; 2192 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2193 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2194 2195 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2196 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2197 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2198 2199 /* create two temporary Index sets for build scatter gather */ 2200 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2201 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2202 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2203 ierr = PetscFree(garray);CHKERRQ(ierr); 2204 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2205 2206 /* create temporary global vector to generate scatter context */ 2207 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 2208 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2209 2210 /* generate the scatter context */ 2211 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2212 2213 ierr = ISDestroy(from);CHKERRQ(ierr); 2214 ierr = ISDestroy(to);CHKERRQ(ierr); 2215 ierr = VecDestroy(gvec);CHKERRQ(ierr); 2216 2217 B->ops->mult = MatMult_MPIMAIJ_dof; 2218 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 2219 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 2220 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 2221 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2222 } 2223 *maij = B; 2224 ierr = MatView_Private(B);CHKERRQ(ierr); 2225 } 2226 PetscFunctionReturn(0); 2227 } 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240