1 static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2 "This test can only be run in parallel.\n" 3 "\n"; 4 5 /*T 6 Concepts: Mat^mat submatrix, parallel 7 Processors: n 8 T*/ 9 10 11 #include <petscmat.h> 12 13 int main(int argc, char **args) 14 { 15 Mat A,*submats; 16 MPI_Comm subcomm; 17 PetscMPIInt rank,size,subrank,subsize,color; 18 PetscInt m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1; 19 PetscInt nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs; 20 PetscInt *rowindices,*colindices,idx,rep; 21 PetscScalar *vals; 22 IS rowis[1],colis[1]; 23 PetscViewer viewer; 24 PetscBool permute_indices,flg; 25 PetscErrorCode ierr; 26 27 ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 28 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 29 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 30 31 ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat");CHKERRQ(ierr); 32 m = 5; 33 ierr = PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg);CHKERRQ(ierr); 34 total_subdomains = size-1; 35 ierr = PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg);CHKERRQ(ierr); 36 permute_indices = PETSC_FALSE; 37 ierr = PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg);CHKERRQ(ierr); 38 hash = 7; 39 ierr = PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg);CHKERRQ(ierr); 40 rep = 2; 41 ierr = PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg);CHKERRQ(ierr); 42 ierr = PetscOptionsEnd();CHKERRQ(ierr); 43 44 if (total_subdomains > size) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %D must not exceed comm size %D",total_subdomains,size); 45 if (total_subdomains < 1 || total_subdomains > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %D (comm size), got total_subdomains = %D",size,total_subdomains); 46 if (rep != 1 && rep != 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %D; must be 1 or 2",rep); 47 48 viewer = PETSC_VIEWER_STDOUT_WORLD; 49 /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 50 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 51 ierr = MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 52 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 53 ierr = MatSetUp(A);CHKERRQ(ierr); 54 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 55 ierr = MatGetLocalSize(A,NULL,&n);CHKERRQ(ierr); 56 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 57 ierr = MatSeqAIJSetPreallocation(A,n,NULL);CHKERRQ(ierr); 58 ierr = MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL);CHKERRQ(ierr); 59 ierr = MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr); 60 ierr = MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr); 61 ierr = MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr); 62 ierr = MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr); 63 64 ierr = PetscMalloc2(N,&cols,N,&vals);CHKERRQ(ierr); 65 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 66 for (j = 0; j < N; ++j) cols[j] = j; 67 for (i=rstart; i<rend; i++) { 68 for (j=0;j<N;++j) { 69 vals[j] = i*10000+j; 70 } 71 ierr = MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 72 } 73 ierr = PetscFree2(cols,vals);CHKERRQ(ierr); 74 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 75 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 76 77 ierr = PetscViewerASCIIPrintf(viewer,"Initial matrix:\n");CHKERRQ(ierr); 78 ierr = MatView(A,viewer);CHKERRQ(ierr); 79 80 81 /* 82 Create subcomms and ISs so that each rank participates in one IS. 83 The IS either coalesces adjacent rank indices (contiguous), 84 or selects indices by scrambling them using a hash. 85 */ 86 k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */ 87 color = rank/k; 88 ierr = MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm);CHKERRQ(ierr); 89 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 90 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 91 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 92 nis = 1; 93 ierr = PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices);CHKERRQ(ierr); 94 95 for (j = rstart; j < rend; ++j) { 96 if (permute_indices) { 97 idx = (j*hash); 98 } else { 99 idx = j; 100 } 101 rowindices[j-rstart] = idx%N; 102 colindices[j-rstart] = (idx+m)%N; 103 } 104 ierr = ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0]);CHKERRQ(ierr); 105 ierr = ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0]);CHKERRQ(ierr); 106 ierr = ISSort(rowis[0]);CHKERRQ(ierr); 107 ierr = ISSort(colis[0]);CHKERRQ(ierr); 108 ierr = PetscFree2(rowindices,colindices);CHKERRQ(ierr); 109 /* 110 Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 111 we need to obtain the global numbers of our local objects and wait for the corresponding global 112 number to be viewed. 113 */ 114 ierr = PetscViewerASCIIPrintf(viewer,"Subdomains");CHKERRQ(ierr); 115 if (permute_indices) { 116 ierr = PetscViewerASCIIPrintf(viewer," (hash=%D)",hash);CHKERRQ(ierr); 117 } 118 ierr = PetscViewerASCIIPrintf(viewer,":\n");CHKERRQ(ierr); 119 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 120 121 nsubdomains = 1; 122 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 123 ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 124 ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 125 for (gs=0,s=0; gs < gnsubdomains;++gs) { 126 if (s < nsubdomains) { 127 PetscInt ss; 128 ss = gsubdomainperm[s]; 129 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 130 PetscViewer subviewer = NULL; 131 ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr); 132 ierr = PetscViewerASCIIPrintf(subviewer,"Row IS %D\n",gs);CHKERRQ(ierr); 133 ierr = ISView(rowis[ss],subviewer);CHKERRQ(ierr); 134 ierr = PetscViewerFlush(subviewer);CHKERRQ(ierr); 135 ierr = PetscViewerASCIIPrintf(subviewer,"Col IS %D\n",gs);CHKERRQ(ierr); 136 ierr = ISView(colis[ss],subviewer);CHKERRQ(ierr); 137 ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr); 138 ++s; 139 } 140 } 141 ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); 142 } 143 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 144 ierr = ISSort(rowis[0]);CHKERRQ(ierr); 145 ierr = ISSort(colis[0]);CHKERRQ(ierr); 146 nsubdomains = 1; 147 ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats);CHKERRQ(ierr); 148 /* 149 Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 150 we need to obtain the global numbers of our local objects and wait for the corresponding global 151 number to be viewed. 152 */ 153 ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n");CHKERRQ(ierr); 154 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 155 ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 156 ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 157 for (gs=0,s=0; gs < gnsubdomains;++gs) { 158 if (s < nsubdomains) { 159 PetscInt ss; 160 ss = gsubdomainperm[s]; 161 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 162 PetscViewer subviewer = NULL; 163 ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 164 ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr); 165 ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 166 ++s; 167 } 168 } 169 ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); 170 } 171 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172 if (rep == 1) goto cleanup; 173 nsubdomains = 1; 174 ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats);CHKERRQ(ierr); 175 /* 176 Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 177 we need to obtain the global numbers of our local objects and wait for the corresponding global 178 number to be viewed. 179 */ 180 ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n");CHKERRQ(ierr); 181 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 182 ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 183 ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 184 for (gs=0,s=0; gs < gnsubdomains;++gs) { 185 if (s < nsubdomains) { 186 PetscInt ss; 187 ss = gsubdomainperm[s]; 188 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 189 PetscViewer subviewer = NULL; 190 ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 191 ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr); 192 ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 193 ++s; 194 } 195 } 196 ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); 197 } 198 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 199 cleanup: 200 for (k=0;k<nsubdomains;++k) { 201 ierr = MatDestroy(submats+k);CHKERRQ(ierr); 202 } 203 ierr = PetscFree(submats);CHKERRQ(ierr); 204 for (k=0;k<nis;++k) { 205 ierr = ISDestroy(rowis+k);CHKERRQ(ierr); 206 ierr = ISDestroy(colis+k);CHKERRQ(ierr); 207 } 208 ierr = MatDestroy(&A);CHKERRQ(ierr); 209 ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr); 210 ierr = PetscFinalize(); 211 return ierr; 212 } 213 214 215 /*TEST 216 217 test: 218 nsize: 2 219 args: -total_subdomains 1 220 output_file: output/ex183_2_1.out 221 222 test: 223 suffix: 2 224 nsize: 3 225 args: -total_subdomains 2 226 output_file: output/ex183_3_2.out 227 228 test: 229 suffix: 3 230 nsize: 4 231 args: -total_subdomains 2 232 output_file: output/ex183_4_2.out 233 234 test: 235 suffix: 4 236 nsize: 6 237 args: -total_subdomains 2 238 output_file: output/ex183_6_2.out 239 240 TEST*/ 241