1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 Input parameter: 30 A - the matrix 31 x - the vector on which FDFT will be performed 32 33 Output parameter: 34 y - vector that stores result of FDFT 35 */ 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMult_SeqFFTW" 38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39 { 40 PetscErrorCode ierr; 41 Mat_FFT *fft = (Mat_FFT*)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43 PetscScalar *x_array,*y_array; 44 PetscInt ndim=fft->ndim,*dim=fft->dim; 45 46 PetscFunctionBegin; 47 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49 if (!fftw->p_forward) { /* create a plan, then excute it */ 50 switch (ndim) { 51 case 1: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 2: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 case 3: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 default: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 } 80 fftw->finarray = x_array; 81 fftw->foutarray = y_array; 82 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84 fftw_execute(fftw->p_forward); 85 } else { /* use existing plan */ 86 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 87 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88 } else { 89 fftw_execute(fftw->p_forward); 90 } 91 } 92 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 /* MatMultTranspose_SeqFFTW performs serial backward DFT 98 Input parameter: 99 A - the matrix 100 x - the vector on which BDFT will be performed 101 102 Output parameter: 103 y - vector that stores result of BDFT 104 */ 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109 { 110 PetscErrorCode ierr; 111 Mat_FFT *fft = (Mat_FFT*)A->data; 112 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113 PetscScalar *x_array,*y_array; 114 PetscInt ndim=fft->ndim,*dim=fft->dim; 115 116 PetscFunctionBegin; 117 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119 if (!fftw->p_backward) { /* create a plan, then excute it */ 120 switch (ndim) { 121 case 1: 122 #if defined(PETSC_USE_COMPLEX) 123 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 124 #else 125 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 126 #endif 127 break; 128 case 2: 129 #if defined(PETSC_USE_COMPLEX) 130 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 131 #else 132 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 133 #endif 134 break; 135 case 3: 136 #if defined(PETSC_USE_COMPLEX) 137 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 138 #else 139 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 140 #endif 141 break; 142 default: 143 #if defined(PETSC_USE_COMPLEX) 144 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 145 #else 146 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 147 #endif 148 break; 149 } 150 fftw->binarray = x_array; 151 fftw->boutarray = y_array; 152 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153 } else { /* use existing plan */ 154 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 155 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156 } else { 157 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158 } 159 } 160 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /* MatMult_MPIFFTW performs forward DFT in parallel 166 Input parameter: 167 A - the matrix 168 x - the vector on which FDFT will be performed 169 170 Output parameter: 171 y - vector that stores result of FDFT 172 */ 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatMult_MPIFFTW" 175 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176 { 177 PetscErrorCode ierr; 178 Mat_FFT *fft = (Mat_FFT*)A->data; 179 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180 PetscScalar *x_array,*y_array; 181 PetscInt ndim=fft->ndim,*dim=fft->dim; 182 MPI_Comm comm; 183 184 PetscFunctionBegin; 185 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 186 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 187 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 188 if (!fftw->p_forward) { /* create a plan, then excute it */ 189 switch (ndim) { 190 case 1: 191 #if defined(PETSC_USE_COMPLEX) 192 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 193 #else 194 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 195 #endif 196 break; 197 case 2: 198 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 199 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 200 #else 201 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 202 #endif 203 break; 204 case 3: 205 #if defined(PETSC_USE_COMPLEX) 206 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 207 #else 208 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 209 #endif 210 break; 211 default: 212 #if defined(PETSC_USE_COMPLEX) 213 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 214 #else 215 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 216 #endif 217 break; 218 } 219 fftw->finarray = x_array; 220 fftw->foutarray = y_array; 221 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 222 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 223 fftw_execute(fftw->p_forward); 224 } else { /* use existing plan */ 225 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 226 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 227 } else { 228 fftw_execute(fftw->p_forward); 229 } 230 } 231 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 232 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 237 Input parameter: 238 A - the matrix 239 x - the vector on which BDFT will be performed 240 241 Output parameter: 242 y - vector that stores result of BDFT 243 */ 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 246 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 247 { 248 PetscErrorCode ierr; 249 Mat_FFT *fft = (Mat_FFT*)A->data; 250 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 251 PetscScalar *x_array,*y_array; 252 PetscInt ndim=fft->ndim,*dim=fft->dim; 253 MPI_Comm comm; 254 255 PetscFunctionBegin; 256 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 257 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 258 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 259 if (!fftw->p_backward) { /* create a plan, then excute it */ 260 switch (ndim) { 261 case 1: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 264 #else 265 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 266 #endif 267 break; 268 case 2: 269 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 270 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271 #else 272 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 273 #endif 274 break; 275 case 3: 276 #if defined(PETSC_USE_COMPLEX) 277 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 278 #else 279 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 280 #endif 281 break; 282 default: 283 #if defined(PETSC_USE_COMPLEX) 284 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 285 #else 286 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 287 #endif 288 break; 289 } 290 fftw->binarray = x_array; 291 fftw->boutarray = y_array; 292 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 293 } else { /* use existing plan */ 294 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 295 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 296 } else { 297 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 298 } 299 } 300 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 301 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatDestroy_FFTW" 307 PetscErrorCode MatDestroy_FFTW(Mat A) 308 { 309 Mat_FFT *fft = (Mat_FFT*)A->data; 310 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 fftw_destroy_plan(fftw->p_forward); 315 fftw_destroy_plan(fftw->p_backward); 316 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 317 ierr = PetscFree(fft->data);CHKERRQ(ierr); 318 fftw_mpi_cleanup(); 319 PetscFunctionReturn(0); 320 } 321 322 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 323 #undef __FUNCT__ 324 #define __FUNCT__ "VecDestroy_MPIFFTW" 325 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 326 { 327 PetscErrorCode ierr; 328 PetscScalar *array; 329 330 PetscFunctionBegin; 331 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 332 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 333 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 334 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 #undef __FUNCT__ 339 #define __FUNCT__ "MatGetVecsFFTW" 340 /*@ 341 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 342 parallel layout determined by FFTW 343 344 Collective on Mat 345 346 Input Parameter: 347 . A - the matrix 348 349 Output Parameter: 350 + x - (optional) input vector of forward FFTW 351 - y - (optional) output vector of forward FFTW 352 - z - (optional) output vector of backward FFTW 353 354 Level: advanced 355 356 Note: The parallel layout of output of forward FFTW is always same as the input 357 of the backward FFTW. But parallel layout of the input vector of forward 358 FFTW might not be same as the output of backward FFTW. 359 Also note that we need to provide enough space while doing parallel real transform. 360 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 361 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 362 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 363 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 364 zeros if it is odd only one zero is needed. 365 Lastly one needs some scratch space at the end of data set in each process. alloc_local 366 figures out how much space is needed, i.e. it figures out the data+scratch space for 367 each processor and returns that. 368 369 .seealso: MatCreateFFTW() 370 @*/ 371 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 372 { 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 382 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 383 { 384 PetscErrorCode ierr; 385 PetscMPIInt size,rank; 386 MPI_Comm comm; 387 Mat_FFT *fft = (Mat_FFT*)A->data; 388 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 389 PetscInt N = fft->N; 390 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 391 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 394 PetscValidType(A,1); 395 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 396 397 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 398 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 399 if (size == 1) { /* sequential case */ 400 #if defined(PETSC_USE_COMPLEX) 401 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 402 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 403 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 404 #else 405 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 406 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 407 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 408 #endif 409 } else { /* parallel cases */ 410 ptrdiff_t alloc_local,local_n0,local_0_start; 411 ptrdiff_t local_n1; 412 fftw_complex *data_fout; 413 ptrdiff_t local_1_start; 414 #if defined(PETSC_USE_COMPLEX) 415 fftw_complex *data_fin,*data_bout; 416 #else 417 double *data_finr,*data_boutr; 418 PetscInt n1,N1; 419 ptrdiff_t temp; 420 #endif 421 422 switch (ndim) { 423 case 1: 424 #if !defined(PETSC_USE_COMPLEX) 425 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 426 #else 427 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 428 if (fin) { 429 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 430 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 431 432 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 433 } 434 if (fout) { 435 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 436 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 437 438 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 439 } 440 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 441 if (bout) { 442 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 443 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 444 445 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 446 } 447 break; 448 #endif 449 case 2: 450 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 451 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 452 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 453 if (fin) { 454 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 455 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 456 457 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 458 } 459 if (bout) { 460 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 461 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 462 463 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 464 } 465 if (fout) { 466 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 467 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 468 469 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 470 } 471 #else 472 /* Get local size */ 473 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 474 if (fin) { 475 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 476 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 477 478 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 479 } 480 if (fout) { 481 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 482 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 483 484 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 485 } 486 if (bout) { 487 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 488 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 489 490 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 491 } 492 #endif 493 break; 494 case 3: 495 #if !defined(PETSC_USE_COMPLEX) 496 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 497 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 498 if (fin) { 499 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 500 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 501 502 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 503 } 504 if (bout) { 505 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 506 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 507 508 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 509 } 510 511 if (fout) { 512 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 513 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 514 515 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 516 } 517 #else 518 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 519 if (fin) { 520 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 522 523 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 if (fout) { 526 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 527 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 528 529 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 530 } 531 if (bout) { 532 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 533 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 534 535 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 536 } 537 #endif 538 break; 539 default: 540 #if !defined(PETSC_USE_COMPLEX) 541 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 542 543 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 544 545 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 546 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 547 548 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 549 550 if (fin) { 551 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 552 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 553 554 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 555 } 556 if (bout) { 557 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 558 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 559 560 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 561 } 562 563 if (fout) { 564 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 565 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 566 567 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 568 } 569 #else 570 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 571 if (fin) { 572 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 573 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 574 575 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 576 } 577 if (fout) { 578 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 579 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 580 581 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 582 } 583 if (bout) { 584 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 585 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 586 587 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 588 } 589 #endif 590 break; 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "VecScatterPetscToFFTW" 598 /*@ 599 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 600 601 Collective on Mat 602 603 Input Parameters: 604 + A - FFTW matrix 605 - x - the PETSc vector 606 607 Output Parameters: 608 . y - the FFTW vector 609 610 Options Database Keys: 611 . -mat_fftw_plannerflags - set FFTW planner flags 612 613 Level: intermediate 614 615 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 616 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 617 zeros. This routine does that job by scattering operation. 618 619 .seealso: VecScatterFFTWToPetsc() 620 @*/ 621 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 622 { 623 PetscErrorCode ierr; 624 625 PetscFunctionBegin; 626 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 632 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 633 { 634 PetscErrorCode ierr; 635 MPI_Comm comm; 636 Mat_FFT *fft = (Mat_FFT*)A->data; 637 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 638 PetscInt N =fft->N; 639 PetscInt ndim =fft->ndim,*dim=fft->dim; 640 PetscInt low; 641 PetscInt rank,size,vsize,vsize1; 642 ptrdiff_t alloc_local,local_n0,local_0_start; 643 ptrdiff_t local_n1,local_1_start; 644 VecScatter vecscat; 645 IS list1,list2; 646 #if !defined(PETSC_USE_COMPLEX) 647 PetscInt i,j,k,partial_dim; 648 PetscInt *indx1, *indx2, tempindx, tempindx1; 649 PetscInt N1, n1,NM; 650 ptrdiff_t temp; 651 #endif 652 653 PetscFunctionBegin; 654 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 655 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 656 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 657 ierr = VecGetOwnershipRange(y,&low,NULL); 658 659 if (size==1) { 660 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 661 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 662 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 663 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 664 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 665 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 666 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 667 ierr = ISDestroy(&list1);CHKERRQ(ierr); 668 } else { 669 switch (ndim) { 670 case 1: 671 #if defined(PETSC_USE_COMPLEX) 672 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 673 674 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 675 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 676 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 677 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 679 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 680 ierr = ISDestroy(&list1);CHKERRQ(ierr); 681 ierr = ISDestroy(&list2);CHKERRQ(ierr); 682 #else 683 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 684 #endif 685 break; 686 case 2: 687 #if defined(PETSC_USE_COMPLEX) 688 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 689 690 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 691 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 692 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 693 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 694 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 696 ierr = ISDestroy(&list1);CHKERRQ(ierr); 697 ierr = ISDestroy(&list2);CHKERRQ(ierr); 698 #else 699 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 700 701 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 702 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 703 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 704 705 if (dim[1]%2==0) { 706 NM = dim[1]+2; 707 } else { 708 NM = dim[1]+1; 709 } 710 for (i=0; i<local_n0; i++) { 711 for (j=0; j<dim[1]; j++) { 712 tempindx = i*dim[1] + j; 713 tempindx1 = i*NM + j; 714 715 indx1[tempindx]=local_0_start*dim[1]+tempindx; 716 indx2[tempindx]=low+tempindx1; 717 } 718 } 719 720 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 721 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 722 723 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 724 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 727 ierr = ISDestroy(&list1);CHKERRQ(ierr); 728 ierr = ISDestroy(&list2);CHKERRQ(ierr); 729 ierr = PetscFree(indx1);CHKERRQ(ierr); 730 ierr = PetscFree(indx2);CHKERRQ(ierr); 731 #endif 732 break; 733 734 case 3: 735 #if defined(PETSC_USE_COMPLEX) 736 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 737 738 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 739 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 740 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 741 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 744 ierr = ISDestroy(&list1);CHKERRQ(ierr); 745 ierr = ISDestroy(&list2);CHKERRQ(ierr); 746 #else 747 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 748 749 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 750 751 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 752 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 753 754 if (dim[2]%2==0) NM = dim[2]+2; 755 else NM = dim[2]+1; 756 757 for (i=0; i<local_n0; i++) { 758 for (j=0; j<dim[1]; j++) { 759 for (k=0; k<dim[2]; k++) { 760 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 761 tempindx1 = i*dim[1]*NM + j*NM + k; 762 763 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 764 indx2[tempindx]=low+tempindx1; 765 } 766 } 767 } 768 769 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 770 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 771 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 772 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 773 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 775 ierr = ISDestroy(&list1);CHKERRQ(ierr); 776 ierr = ISDestroy(&list2);CHKERRQ(ierr); 777 ierr = PetscFree(indx1);CHKERRQ(ierr); 778 ierr = PetscFree(indx2);CHKERRQ(ierr); 779 #endif 780 break; 781 782 default: 783 #if defined(PETSC_USE_COMPLEX) 784 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 785 786 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 787 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 788 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 789 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 792 ierr = ISDestroy(&list1);CHKERRQ(ierr); 793 ierr = ISDestroy(&list2);CHKERRQ(ierr); 794 #else 795 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 796 797 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 798 799 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 800 801 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 802 803 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 804 805 partial_dim = fftw->partial_dim; 806 807 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 808 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 809 810 if (dim[ndim-1]%2==0) NM = 2; 811 else NM = 1; 812 813 j = low; 814 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 815 indx1[i] = local_0_start*partial_dim + i; 816 indx2[i] = j; 817 if (k%dim[ndim-1]==0) j+=NM; 818 j++; 819 } 820 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 821 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 822 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 823 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 826 ierr = ISDestroy(&list1);CHKERRQ(ierr); 827 ierr = ISDestroy(&list2);CHKERRQ(ierr); 828 ierr = PetscFree(indx1);CHKERRQ(ierr); 829 ierr = PetscFree(indx2);CHKERRQ(ierr); 830 #endif 831 break; 832 } 833 } 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "VecScatterFFTWToPetsc" 839 /*@ 840 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 841 842 Collective on Mat 843 844 Input Parameters: 845 + A - FFTW matrix 846 - x - FFTW vector 847 848 Output Parameters: 849 . y - PETSc vector 850 851 Level: intermediate 852 853 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 854 VecScatterFFTWToPetsc removes those extra zeros. 855 856 .seealso: VecScatterPetscToFFTW() 857 @*/ 858 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 859 { 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 869 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 870 { 871 PetscErrorCode ierr; 872 MPI_Comm comm; 873 Mat_FFT *fft = (Mat_FFT*)A->data; 874 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 875 PetscInt N = fft->N; 876 PetscInt ndim = fft->ndim,*dim=fft->dim; 877 PetscInt low; 878 PetscInt rank,size; 879 ptrdiff_t alloc_local,local_n0,local_0_start; 880 ptrdiff_t local_n1,local_1_start; 881 VecScatter vecscat; 882 IS list1,list2; 883 #if !defined(PETSC_USE_COMPLEX) 884 PetscInt i,j,k,partial_dim; 885 PetscInt *indx1, *indx2, tempindx, tempindx1; 886 PetscInt N1, n1,NM; 887 ptrdiff_t temp; 888 #endif 889 890 PetscFunctionBegin; 891 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 892 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 893 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 894 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 895 896 if (size==1) { 897 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 898 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 899 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 902 ierr = ISDestroy(&list1);CHKERRQ(ierr); 903 904 } else { 905 switch (ndim) { 906 case 1: 907 #if defined(PETSC_USE_COMPLEX) 908 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 909 910 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 911 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 912 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 913 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 916 ierr = ISDestroy(&list1);CHKERRQ(ierr); 917 ierr = ISDestroy(&list2);CHKERRQ(ierr); 918 #else 919 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 920 #endif 921 break; 922 case 2: 923 #if defined(PETSC_USE_COMPLEX) 924 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 925 926 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 927 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 928 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 929 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 932 ierr = ISDestroy(&list1);CHKERRQ(ierr); 933 ierr = ISDestroy(&list2);CHKERRQ(ierr); 934 #else 935 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 936 937 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 938 939 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 940 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 941 942 if (dim[1]%2==0) NM = dim[1]+2; 943 else NM = dim[1]+1; 944 945 for (i=0; i<local_n0; i++) { 946 for (j=0; j<dim[1]; j++) { 947 tempindx = i*dim[1] + j; 948 tempindx1 = i*NM + j; 949 950 indx1[tempindx]=local_0_start*dim[1]+tempindx; 951 indx2[tempindx]=low+tempindx1; 952 } 953 } 954 955 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 956 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 957 958 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 959 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 962 ierr = ISDestroy(&list1);CHKERRQ(ierr); 963 ierr = ISDestroy(&list2);CHKERRQ(ierr); 964 ierr = PetscFree(indx1);CHKERRQ(ierr); 965 ierr = PetscFree(indx2);CHKERRQ(ierr); 966 #endif 967 break; 968 case 3: 969 #if defined(PETSC_USE_COMPLEX) 970 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 971 972 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 973 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 974 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 975 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 978 ierr = ISDestroy(&list1);CHKERRQ(ierr); 979 ierr = ISDestroy(&list2);CHKERRQ(ierr); 980 #else 981 982 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 983 984 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 985 986 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 987 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 988 989 if (dim[2]%2==0) NM = dim[2]+2; 990 else NM = dim[2]+1; 991 992 for (i=0; i<local_n0; i++) { 993 for (j=0; j<dim[1]; j++) { 994 for (k=0; k<dim[2]; k++) { 995 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 996 tempindx1 = i*dim[1]*NM + j*NM + k; 997 998 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 999 indx2[tempindx]=low+tempindx1; 1000 } 1001 } 1002 } 1003 1004 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1005 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1006 1007 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1008 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1011 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1012 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1013 ierr = PetscFree(indx1);CHKERRQ(ierr); 1014 ierr = PetscFree(indx2);CHKERRQ(ierr); 1015 #endif 1016 break; 1017 default: 1018 #if defined(PETSC_USE_COMPLEX) 1019 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1020 1021 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1022 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1023 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1027 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1028 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1029 #else 1030 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1031 1032 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1033 1034 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1035 1036 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1037 1038 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1039 1040 partial_dim = fftw->partial_dim; 1041 1042 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1043 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1044 1045 if (dim[ndim-1]%2==0) NM = 2; 1046 else NM = 1; 1047 1048 j = low; 1049 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1050 indx1[i] = local_0_start*partial_dim + i; 1051 indx2[i] = j; 1052 if (k%dim[ndim-1]==0) j+=NM; 1053 j++; 1054 } 1055 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1056 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1057 1058 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1059 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1062 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1063 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1064 ierr = PetscFree(indx1);CHKERRQ(ierr); 1065 ierr = PetscFree(indx2);CHKERRQ(ierr); 1066 #endif 1067 break; 1068 } 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatCreate_FFTW" 1075 /* 1076 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1077 1078 Options Database Keys: 1079 + -mat_fftw_plannerflags - set FFTW planner flags 1080 1081 Level: intermediate 1082 1083 */ 1084 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1085 { 1086 PetscErrorCode ierr; 1087 MPI_Comm comm; 1088 Mat_FFT *fft=(Mat_FFT*)A->data; 1089 Mat_FFTW *fftw; 1090 PetscInt n =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1091 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1092 PetscBool flg; 1093 PetscInt p_flag,partial_dim=1,ctr; 1094 PetscMPIInt size,rank; 1095 ptrdiff_t *pdim; 1096 ptrdiff_t local_n1,local_1_start; 1097 #if !defined(PETSC_USE_COMPLEX) 1098 ptrdiff_t temp; 1099 PetscInt N1,tot_dim; 1100 #else 1101 PetscInt n1; 1102 #endif 1103 1104 PetscFunctionBegin; 1105 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1106 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1107 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1108 1109 fftw_mpi_init(); 1110 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1111 pdim[0] = dim[0]; 1112 #if !defined(PETSC_USE_COMPLEX) 1113 tot_dim = 2*dim[0]; 1114 #endif 1115 for (ctr=1; ctr<ndim; ctr++) { 1116 partial_dim *= dim[ctr]; 1117 pdim[ctr] = dim[ctr]; 1118 #if !defined(PETSC_USE_COMPLEX) 1119 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1120 else tot_dim *= dim[ctr]; 1121 #endif 1122 } 1123 1124 if (size == 1) { 1125 #if defined(PETSC_USE_COMPLEX) 1126 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1127 n = N; 1128 #else 1129 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1130 n = tot_dim; 1131 #endif 1132 1133 } else { 1134 ptrdiff_t alloc_local,local_n0,local_0_start; 1135 switch (ndim) { 1136 case 1: 1137 #if !defined(PETSC_USE_COMPLEX) 1138 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1139 #else 1140 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1141 1142 n = (PetscInt)local_n0; 1143 n1 = (PetscInt)local_n1; 1144 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1145 #endif 1146 break; 1147 case 2: 1148 #if defined(PETSC_USE_COMPLEX) 1149 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1150 /* 1151 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1152 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1153 */ 1154 n = (PetscInt)local_n0*dim[1]; 1155 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1156 #else 1157 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1158 1159 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1160 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1161 #endif 1162 break; 1163 case 3: 1164 #if defined(PETSC_USE_COMPLEX) 1165 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1166 1167 n = (PetscInt)local_n0*dim[1]*dim[2]; 1168 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1169 #else 1170 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1171 1172 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1173 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1174 #endif 1175 break; 1176 default: 1177 #if defined(PETSC_USE_COMPLEX) 1178 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1179 1180 n = (PetscInt)local_n0*partial_dim; 1181 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1182 #else 1183 temp = pdim[ndim-1]; 1184 1185 pdim[ndim-1] = temp/2 + 1; 1186 1187 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1188 1189 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1190 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1191 1192 pdim[ndim-1] = temp; 1193 1194 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1195 #endif 1196 break; 1197 } 1198 } 1199 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1200 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1201 fft->data = (void*)fftw; 1202 1203 fft->n = n; 1204 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1205 fftw->partial_dim = partial_dim; 1206 1207 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));CHKERRQ(ierr); 1208 1209 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1210 1211 fftw->p_forward = 0; 1212 fftw->p_backward = 0; 1213 fftw->p_flag = FFTW_ESTIMATE; 1214 1215 if (size == 1) { 1216 A->ops->mult = MatMult_SeqFFTW; 1217 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1218 } else { 1219 A->ops->mult = MatMult_MPIFFTW; 1220 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1221 } 1222 fft->matdestroy = MatDestroy_FFTW; 1223 A->assembled = PETSC_TRUE; 1224 A->preallocated = PETSC_TRUE; 1225 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1229 1230 /* get runtime options */ 1231 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1232 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1233 if (flg) fftw->p_flag = (unsigned)p_flag; 1234 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 1239 1240 1241