1e8271787SHong Zhang #include <../src/mat/impls/baij/mpi/mpibaij.h>
2e8271787SHong Zhang
MatGetMultiProcBlock_MPIBAIJ(Mat mat,MPI_Comm subComm,MatReuse scall,Mat * subMat)3d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetMultiProcBlock_MPIBAIJ(Mat mat, MPI_Comm subComm, MatReuse scall, Mat *subMat)
4d71ae5a4SJacob Faibussowitsch {
5e8271787SHong Zhang Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
6e8271787SHong Zhang Mat_SeqBAIJ *aijB = (Mat_SeqBAIJ *)aij->B->data;
7e8271787SHong Zhang PetscMPIInt commRank, subCommSize, subCommRank;
8e8271787SHong Zhang PetscMPIInt *commRankMap, subRank, rank, commsize;
902e9429cSHong Zhang PetscInt *garrayCMap, col, i, j, *nnz, newRow, newCol, *newbRow, *newbCol, k, k1;
10e8271787SHong Zhang PetscInt bs = mat->rmap->bs;
1102e9429cSHong Zhang PetscScalar *vals, *aijBvals;
12e8271787SHong Zhang
13e8271787SHong Zhang PetscFunctionBegin;
149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &commsize));
159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(subComm, &subCommSize));
16e8271787SHong Zhang
17a5b23f4aSJose E. Roman /* create subMat object with the relevant layout */
18e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) {
199566063dSJacob Faibussowitsch PetscCall(MatCreate(subComm, subMat));
209566063dSJacob Faibussowitsch PetscCall(MatSetType(*subMat, MATMPIBAIJ));
219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*subMat, mat->rmap->n, mat->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
229566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*subMat, mat->rmap->bs, mat->cmap->bs));
23e8271787SHong Zhang
24e8271787SHong Zhang /* need to setup rmap and cmap before Preallocation */
259566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize((*subMat)->rmap, mat->rmap->bs));
269566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize((*subMat)->cmap, mat->cmap->bs));
279566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*subMat)->rmap));
289566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*subMat)->cmap));
29e8271787SHong Zhang }
30e8271787SHong Zhang
31e8271787SHong Zhang /* create a map of comm_rank from subComm to comm - should commRankMap and garrayCMap be kept for reused? */
329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &commRank));
339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subComm, &subCommRank));
349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(subCommSize, &commRankMap));
359566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgather(&commRank, 1, MPI_INT, commRankMap, 1, MPI_INT, subComm));
36e8271787SHong Zhang
37e8271787SHong Zhang /* Traverse garray and identify blocked column indices [of offdiag mat] that
38e8271787SHong Zhang should be discarded. For the ones not discarded, store the newCol+1
39e8271787SHong Zhang value in garrayCMap */
409566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(aij->B->cmap->n / bs, &garrayCMap));
41e8271787SHong Zhang for (i = 0; i < aij->B->cmap->n / bs; i++) {
42e8271787SHong Zhang col = aij->garray[i]; /* blocked column index */
43e8271787SHong Zhang for (subRank = 0; subRank < subCommSize; subRank++) {
44e8271787SHong Zhang rank = commRankMap[subRank];
45e8271787SHong Zhang if ((col >= mat->cmap->range[rank] / bs) && (col < mat->cmap->range[rank + 1] / bs)) {
4602e9429cSHong Zhang garrayCMap[i] = (((*subMat)->cmap->range[subRank] - mat->cmap->range[rank]) / bs + col + 1);
47e8271787SHong Zhang break;
48e8271787SHong Zhang }
49e8271787SHong Zhang }
50e8271787SHong Zhang }
51e8271787SHong Zhang
52e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) {
53e8271787SHong Zhang /* Now compute preallocation for the offdiag mat */
549566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(aij->B->rmap->n / bs, &nnz));
55e8271787SHong Zhang for (i = 0; i < aij->B->rmap->n / bs; i++) {
56e8271787SHong Zhang for (j = aijB->i[i]; j < aijB->i[i + 1]; j++) {
57e8271787SHong Zhang if (garrayCMap[aijB->j[j]]) nnz[i]++;
58e8271787SHong Zhang }
59e8271787SHong Zhang }
609566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(*(subMat), bs, 0, NULL, 0, nnz));
61e8271787SHong Zhang
62e8271787SHong Zhang /* reuse diag block with the new submat */
639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&((Mat_MPIBAIJ *)((*subMat)->data))->A));
64e8271787SHong Zhang
65e8271787SHong Zhang ((Mat_MPIBAIJ *)((*subMat)->data))->A = aij->A;
66e8271787SHong Zhang
679566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)aij->A));
68e8271787SHong Zhang } else if (((Mat_MPIBAIJ *)(*subMat)->data)->A != aij->A) {
69e8271787SHong Zhang PetscObject obj = (PetscObject)((Mat_MPIBAIJ *)((*subMat)->data))->A;
70e8271787SHong Zhang
71*835f2295SStefano Zampini PetscCall(PetscObjectReference(obj));
72e8271787SHong Zhang
73e8271787SHong Zhang ((Mat_MPIBAIJ *)((*subMat)->data))->A = aij->A;
74e8271787SHong Zhang
759566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)aij->A));
76e8271787SHong Zhang }
77e8271787SHong Zhang
78e8271787SHong Zhang /* Now traverse aij->B and insert values into subMat */
799f0612e4SBarry Smith /* Does not need PetscShmgetAllocateArray() since temporary */
809566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(bs, &newbRow, bs, &newbCol, bs * bs, &vals));
81e8271787SHong Zhang for (i = 0; i < aij->B->rmap->n / bs; i++) {
82e8271787SHong Zhang newRow = (*subMat)->rmap->range[subCommRank] + i * bs;
83e8271787SHong Zhang for (j = aijB->i[i]; j < aijB->i[i + 1]; j++) {
84e8271787SHong Zhang newCol = garrayCMap[aijB->j[j]];
85e8271787SHong Zhang if (newCol) {
86e8271787SHong Zhang newCol--; /* remove the increment */
87e8271787SHong Zhang newCol *= bs;
88e8271787SHong Zhang for (k = 0; k < bs; k++) {
89e8271787SHong Zhang newbRow[k] = newRow + k;
90e8271787SHong Zhang newbCol[k] = newCol + k;
91e8271787SHong Zhang }
92e8271787SHong Zhang /* copy column-oriented aijB->a into row-oriented vals */
9302e9429cSHong Zhang aijBvals = aijB->a + j * bs * bs;
94e8271787SHong Zhang for (k1 = 0; k1 < bs; k1++) {
95ad540459SPierre Jolivet for (k = 0; k < bs; k++) vals[k1 + k * bs] = *aijBvals++;
96e8271787SHong Zhang }
979566063dSJacob Faibussowitsch PetscCall(MatSetValues(*subMat, bs, newbRow, bs, newbCol, vals, INSERT_VALUES));
98e8271787SHong Zhang }
99e8271787SHong Zhang }
100e8271787SHong Zhang }
1019566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*subMat, MAT_FINAL_ASSEMBLY));
1029566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*subMat, MAT_FINAL_ASSEMBLY));
103e8271787SHong Zhang
104e8271787SHong Zhang /* deallocate temporary data */
1059566063dSJacob Faibussowitsch PetscCall(PetscFree3(newbRow, newbCol, vals));
1069566063dSJacob Faibussowitsch PetscCall(PetscFree(commRankMap));
1079566063dSJacob Faibussowitsch PetscCall(PetscFree(garrayCMap));
10848a46eb9SPierre Jolivet if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscFree(nnz));
1093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
110e8271787SHong Zhang }
111