1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 Input parameter: 30 A - the matrix 31 x - the vector on which FDFT will be performed 32 33 Output parameter: 34 y - vector that stores result of FDFT 35 */ 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMult_SeqFFTW" 38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39 { 40 PetscErrorCode ierr; 41 Mat_FFT *fft = (Mat_FFT*)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43 PetscScalar *x_array,*y_array; 44 PetscInt ndim = fft->ndim,*dim = fft->dim; 45 int *dim_32,i; 46 47 PetscFunctionBegin; 48 ierr = PetscMalloc(ndim*sizeof(int),&dim_32); 49 for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i]; 50 51 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 52 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 53 if (!fftw->p_forward) { /* create a plan, then excute it */ 54 switch (ndim) { 55 case 1: 56 #if defined(PETSC_USE_COMPLEX) 57 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58 #else 59 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 60 #endif 61 break; 62 case 2: 63 #if defined(PETSC_USE_COMPLEX) 64 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 65 #else 66 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 67 #endif 68 break; 69 case 3: 70 #if defined(PETSC_USE_COMPLEX) 71 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 72 //fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,FFTW_MEASURE); 73 #else 74 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 75 #endif 76 break; 77 default: 78 #if defined(PETSC_USE_COMPLEX) 79 fftw->p_forward = fftw_plan_dft((int)ndim,(int*)dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 80 /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */ 81 #else 82 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 83 #endif 84 break; 85 } 86 fftw->finarray = x_array; 87 fftw->foutarray = y_array; 88 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 89 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 90 fftw_execute(fftw->p_forward); 91 } else { /* use existing plan */ 92 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 93 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 94 } else { 95 fftw_execute(fftw->p_forward); 96 } 97 } 98 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 99 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 100 ierr = PetscFree(dim_32);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 /* MatMultTranspose_SeqFFTW performs serial backward DFT 105 Input parameter: 106 A - the matrix 107 x - the vector on which BDFT will be performed 108 109 Output parameter: 110 y - vector that stores result of BDFT 111 */ 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 115 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 116 { 117 PetscErrorCode ierr; 118 Mat_FFT *fft = (Mat_FFT*)A->data; 119 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 120 PetscScalar *x_array,*y_array; 121 PetscInt ndim=fft->ndim,*dim=fft->dim; 122 int *dim_32,i; /* for 64-bit */ 123 124 PetscFunctionBegin; 125 ierr = PetscMalloc(ndim*sizeof(int),&dim_32); 126 for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i]; 127 128 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 129 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 130 if (!fftw->p_backward) { /* create a plan, then excute it */ 131 switch (ndim) { 132 case 1: 133 #if defined(PETSC_USE_COMPLEX) 134 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 135 #else 136 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 137 #endif 138 break; 139 case 2: 140 #if defined(PETSC_USE_COMPLEX) 141 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 142 #else 143 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 144 #endif 145 break; 146 case 3: 147 #if defined(PETSC_USE_COMPLEX) 148 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 149 //fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,FFTW_MEASURE); 150 #else 151 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 152 #endif 153 break; 154 default: 155 #if defined(PETSC_USE_COMPLEX) 156 fftw->p_backward = fftw_plan_dft((int)ndim,dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 157 #else 158 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 159 #endif 160 break; 161 } 162 fftw->binarray = x_array; 163 fftw->boutarray = y_array; 164 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 165 } else { /* use existing plan */ 166 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 167 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 168 } else { 169 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 170 } 171 } 172 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 173 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 174 ierr = PetscFree(dim_32);CHKERRQ(ierr); 175 PetscFunctionReturn(0); 176 } 177 178 /* MatMult_MPIFFTW performs forward DFT in parallel 179 Input parameter: 180 A - the matrix 181 x - the vector on which FDFT will be performed 182 183 Output parameter: 184 y - vector that stores result of FDFT 185 */ 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMult_MPIFFTW" 188 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 189 { 190 PetscErrorCode ierr; 191 Mat_FFT *fft = (Mat_FFT*)A->data; 192 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 193 PetscScalar *x_array,*y_array; 194 PetscInt ndim=fft->ndim,*dim=fft->dim; 195 MPI_Comm comm; 196 197 PetscFunctionBegin; 198 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 199 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 200 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 201 if (!fftw->p_forward) { /* create a plan, then excute it */ 202 switch (ndim) { 203 case 1: 204 #if defined(PETSC_USE_COMPLEX) 205 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 208 #endif 209 break; 210 case 2: 211 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 212 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 case 3: 218 #if defined(PETSC_USE_COMPLEX) 219 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 220 #else 221 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 222 #endif 223 break; 224 default: 225 #if defined(PETSC_USE_COMPLEX) 226 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 227 #else 228 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 229 #endif 230 break; 231 } 232 fftw->finarray = x_array; 233 fftw->foutarray = y_array; 234 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 235 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 236 fftw_execute(fftw->p_forward); 237 } else { /* use existing plan */ 238 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 239 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 240 } else { 241 fftw_execute(fftw->p_forward); 242 } 243 } 244 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 245 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 250 Input parameter: 251 A - the matrix 252 x - the vector on which BDFT will be performed 253 254 Output parameter: 255 y - vector that stores result of BDFT 256 */ 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 259 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 260 { 261 PetscErrorCode ierr; 262 Mat_FFT *fft = (Mat_FFT*)A->data; 263 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 264 PetscScalar *x_array,*y_array; 265 PetscInt ndim=fft->ndim,*dim=fft->dim; 266 MPI_Comm comm; 267 268 PetscFunctionBegin; 269 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 270 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 271 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 272 if (!fftw->p_backward) { /* create a plan, then excute it */ 273 switch (ndim) { 274 case 1: 275 #if defined(PETSC_USE_COMPLEX) 276 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 277 #else 278 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 279 #endif 280 break; 281 case 2: 282 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 283 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 284 #else 285 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 286 #endif 287 break; 288 case 3: 289 #if defined(PETSC_USE_COMPLEX) 290 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 291 #else 292 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 293 #endif 294 break; 295 default: 296 #if defined(PETSC_USE_COMPLEX) 297 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 298 #else 299 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 300 #endif 301 break; 302 } 303 fftw->binarray = x_array; 304 fftw->boutarray = y_array; 305 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 306 } else { /* use existing plan */ 307 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 308 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 309 } else { 310 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 311 } 312 } 313 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 314 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatDestroy_FFTW" 320 PetscErrorCode MatDestroy_FFTW(Mat A) 321 { 322 Mat_FFT *fft = (Mat_FFT*)A->data; 323 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 324 PetscErrorCode ierr; 325 326 PetscFunctionBegin; 327 fftw_destroy_plan(fftw->p_forward); 328 fftw_destroy_plan(fftw->p_backward); 329 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 330 ierr = PetscFree(fft->data);CHKERRQ(ierr); 331 fftw_mpi_cleanup(); 332 PetscFunctionReturn(0); 333 } 334 335 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 336 #undef __FUNCT__ 337 #define __FUNCT__ "VecDestroy_MPIFFTW" 338 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 339 { 340 PetscErrorCode ierr; 341 PetscScalar *array; 342 343 PetscFunctionBegin; 344 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 345 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 346 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 347 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatGetVecsFFTW" 353 /*@ 354 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 355 parallel layout determined by FFTW 356 357 Collective on Mat 358 359 Input Parameter: 360 . A - the matrix 361 362 Output Parameter: 363 + x - (optional) input vector of forward FFTW 364 - y - (optional) output vector of forward FFTW 365 - z - (optional) output vector of backward FFTW 366 367 Level: advanced 368 369 Note: The parallel layout of output of forward FFTW is always same as the input 370 of the backward FFTW. But parallel layout of the input vector of forward 371 FFTW might not be same as the output of backward FFTW. 372 Also note that we need to provide enough space while doing parallel real transform. 373 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 374 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 375 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 376 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 377 zeros if it is odd only one zero is needed. 378 Lastly one needs some scratch space at the end of data set in each process. alloc_local 379 figures out how much space is needed, i.e. it figures out the data+scratch space for 380 each processor and returns that. 381 382 .seealso: MatCreateFFTW() 383 @*/ 384 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 385 { 386 PetscErrorCode ierr; 387 388 PetscFunctionBegin; 389 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 395 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 396 { 397 PetscErrorCode ierr; 398 PetscMPIInt size,rank; 399 MPI_Comm comm; 400 Mat_FFT *fft = (Mat_FFT*)A->data; 401 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 402 PetscInt N = fft->N; 403 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 404 405 PetscFunctionBegin; 406 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 407 PetscValidType(A,1); 408 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 409 410 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 411 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 412 if (size == 1) { /* sequential case */ 413 #if defined(PETSC_USE_COMPLEX) 414 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 415 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 416 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 417 #else 418 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 419 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 420 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 421 #endif 422 } else { /* parallel cases */ 423 ptrdiff_t alloc_local,local_n0,local_0_start; 424 ptrdiff_t local_n1; 425 fftw_complex *data_fout; 426 ptrdiff_t local_1_start; 427 #if defined(PETSC_USE_COMPLEX) 428 fftw_complex *data_fin,*data_bout; 429 #else 430 double *data_finr,*data_boutr; 431 PetscInt n1,N1; 432 ptrdiff_t temp; 433 #endif 434 435 switch (ndim) { 436 case 1: 437 #if !defined(PETSC_USE_COMPLEX) 438 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 439 #else 440 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 441 if (fin) { 442 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 443 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 444 445 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 446 } 447 if (fout) { 448 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 449 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 450 451 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 452 } 453 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 454 if (bout) { 455 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 456 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 457 458 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 459 } 460 break; 461 #endif 462 case 2: 463 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 464 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 465 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 466 if (fin) { 467 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 468 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 469 470 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 471 } 472 if (bout) { 473 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 474 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 475 476 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 477 } 478 if (fout) { 479 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 480 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 481 482 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 483 } 484 #else 485 /* Get local size */ 486 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 487 if (fin) { 488 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 490 491 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 492 } 493 if (fout) { 494 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 495 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 496 497 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 498 } 499 if (bout) { 500 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 501 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 502 503 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 504 } 505 #endif 506 break; 507 case 3: 508 #if !defined(PETSC_USE_COMPLEX) 509 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 510 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 511 if (fin) { 512 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 513 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 514 515 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 516 } 517 if (bout) { 518 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 519 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 520 521 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 522 } 523 524 if (fout) { 525 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 527 528 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 529 } 530 #else 531 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 532 if (fin) { 533 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 534 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 535 536 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 537 } 538 if (fout) { 539 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 540 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 541 542 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 543 } 544 if (bout) { 545 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 546 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 547 548 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 549 } 550 #endif 551 break; 552 default: 553 #if !defined(PETSC_USE_COMPLEX) 554 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 555 556 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 557 558 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 559 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 560 561 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 562 563 if (fin) { 564 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 565 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 566 567 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 568 } 569 if (bout) { 570 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 571 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 572 573 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 574 } 575 576 if (fout) { 577 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 578 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 579 580 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 581 } 582 #else 583 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 584 if (fin) { 585 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 587 588 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 589 } 590 if (fout) { 591 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 592 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 593 594 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 595 } 596 if (bout) { 597 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 598 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 599 600 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 601 } 602 #endif 603 break; 604 } 605 } 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNCT__ 610 #define __FUNCT__ "VecScatterPetscToFFTW" 611 /*@ 612 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 613 614 Collective on Mat 615 616 Input Parameters: 617 + A - FFTW matrix 618 - x - the PETSc vector 619 620 Output Parameters: 621 . y - the FFTW vector 622 623 Options Database Keys: 624 . -mat_fftw_plannerflags - set FFTW planner flags 625 626 Level: intermediate 627 628 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 629 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 630 zeros. This routine does that job by scattering operation. 631 632 .seealso: VecScatterFFTWToPetsc() 633 @*/ 634 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 635 { 636 PetscErrorCode ierr; 637 638 PetscFunctionBegin; 639 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 645 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 646 { 647 PetscErrorCode ierr; 648 MPI_Comm comm; 649 Mat_FFT *fft = (Mat_FFT*)A->data; 650 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 651 PetscInt N =fft->N; 652 PetscInt ndim =fft->ndim,*dim=fft->dim; 653 PetscInt low; 654 PetscMPIInt rank,size; 655 PetscInt vsize,vsize1; 656 //ptrdiff_t alloc_local,local_n0,local_0_start; 657 ptrdiff_t local_n0,local_0_start; 658 ptrdiff_t local_n1,local_1_start; 659 VecScatter vecscat; 660 IS list1,list2; 661 #if !defined(PETSC_USE_COMPLEX) 662 PetscInt i,j,k,partial_dim; 663 PetscInt *indx1, *indx2, tempindx, tempindx1; 664 PetscInt N1, n1,NM; 665 ptrdiff_t temp; 666 #endif 667 668 PetscFunctionBegin; 669 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 670 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 671 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 672 ierr = VecGetOwnershipRange(y,&low,NULL); 673 674 if (size==1) { 675 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 676 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 677 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 678 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 679 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 680 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 681 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 682 ierr = ISDestroy(&list1);CHKERRQ(ierr); 683 } else { 684 switch (ndim) { 685 case 1: 686 #if defined(PETSC_USE_COMPLEX) 687 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 688 689 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 690 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 691 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 692 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 693 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 694 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 695 ierr = ISDestroy(&list1);CHKERRQ(ierr); 696 ierr = ISDestroy(&list2);CHKERRQ(ierr); 697 #else 698 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 699 #endif 700 break; 701 case 2: 702 #if defined(PETSC_USE_COMPLEX) 703 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 704 705 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 706 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 707 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 708 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 709 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 710 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 711 ierr = ISDestroy(&list1);CHKERRQ(ierr); 712 ierr = ISDestroy(&list2);CHKERRQ(ierr); 713 #else 714 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 715 716 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 717 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 718 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 719 720 if (dim[1]%2==0) { 721 NM = dim[1]+2; 722 } else { 723 NM = dim[1]+1; 724 } 725 for (i=0; i<local_n0; i++) { 726 for (j=0; j<dim[1]; j++) { 727 tempindx = i*dim[1] + j; 728 tempindx1 = i*NM + j; 729 730 indx1[tempindx]=local_0_start*dim[1]+tempindx; 731 indx2[tempindx]=low+tempindx1; 732 } 733 } 734 735 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 736 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 737 738 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 739 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 741 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 742 ierr = ISDestroy(&list1);CHKERRQ(ierr); 743 ierr = ISDestroy(&list2);CHKERRQ(ierr); 744 ierr = PetscFree(indx1);CHKERRQ(ierr); 745 ierr = PetscFree(indx2);CHKERRQ(ierr); 746 #endif 747 break; 748 749 case 3: 750 #if defined(PETSC_USE_COMPLEX) 751 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 752 753 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 754 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 755 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 756 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 757 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 758 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 759 ierr = ISDestroy(&list1);CHKERRQ(ierr); 760 ierr = ISDestroy(&list2);CHKERRQ(ierr); 761 #else 762 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 763 764 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 765 766 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 767 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 768 769 if (dim[2]%2==0) NM = dim[2]+2; 770 else NM = dim[2]+1; 771 772 for (i=0; i<local_n0; i++) { 773 for (j=0; j<dim[1]; j++) { 774 for (k=0; k<dim[2]; k++) { 775 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 776 tempindx1 = i*dim[1]*NM + j*NM + k; 777 778 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 779 indx2[tempindx]=low+tempindx1; 780 } 781 } 782 } 783 784 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 785 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 786 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 787 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 790 ierr = ISDestroy(&list1);CHKERRQ(ierr); 791 ierr = ISDestroy(&list2);CHKERRQ(ierr); 792 ierr = PetscFree(indx1);CHKERRQ(ierr); 793 ierr = PetscFree(indx2);CHKERRQ(ierr); 794 #endif 795 break; 796 797 default: 798 #if defined(PETSC_USE_COMPLEX) 799 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 800 801 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 802 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 803 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 804 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 805 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 807 ierr = ISDestroy(&list1);CHKERRQ(ierr); 808 ierr = ISDestroy(&list2);CHKERRQ(ierr); 809 #else 810 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 811 812 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 813 814 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 815 816 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 817 818 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 819 820 partial_dim = fftw->partial_dim; 821 822 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 823 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 824 825 if (dim[ndim-1]%2==0) NM = 2; 826 else NM = 1; 827 828 j = low; 829 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 830 indx1[i] = local_0_start*partial_dim + i; 831 indx2[i] = j; 832 if (k%dim[ndim-1]==0) j+=NM; 833 j++; 834 } 835 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 836 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 837 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 838 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 839 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 840 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 841 ierr = ISDestroy(&list1);CHKERRQ(ierr); 842 ierr = ISDestroy(&list2);CHKERRQ(ierr); 843 ierr = PetscFree(indx1);CHKERRQ(ierr); 844 ierr = PetscFree(indx2);CHKERRQ(ierr); 845 #endif 846 break; 847 } 848 } 849 PetscFunctionReturn(0); 850 } 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "VecScatterFFTWToPetsc" 854 /*@ 855 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 856 857 Collective on Mat 858 859 Input Parameters: 860 + A - FFTW matrix 861 - x - FFTW vector 862 863 Output Parameters: 864 . y - PETSc vector 865 866 Level: intermediate 867 868 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 869 VecScatterFFTWToPetsc removes those extra zeros. 870 871 .seealso: VecScatterPetscToFFTW() 872 @*/ 873 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 874 { 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 884 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 885 { 886 PetscErrorCode ierr; 887 MPI_Comm comm; 888 Mat_FFT *fft = (Mat_FFT*)A->data; 889 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 890 PetscInt N = fft->N; 891 PetscInt ndim = fft->ndim,*dim=fft->dim; 892 PetscInt low; 893 PetscMPIInt rank,size; 894 ptrdiff_t local_n0,local_0_start; 895 ptrdiff_t local_n1,local_1_start; 896 VecScatter vecscat; 897 IS list1,list2; 898 #if !defined(PETSC_USE_COMPLEX) 899 PetscInt i,j,k,partial_dim; 900 PetscInt *indx1, *indx2, tempindx, tempindx1; 901 PetscInt N1, n1,NM; 902 ptrdiff_t temp; 903 #endif 904 905 PetscFunctionBegin; 906 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 907 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 908 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 909 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 910 911 if (size==1) { 912 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 913 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 914 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 917 ierr = ISDestroy(&list1);CHKERRQ(ierr); 918 919 } else { 920 switch (ndim) { 921 case 1: 922 #if defined(PETSC_USE_COMPLEX) 923 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 924 925 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 926 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 927 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 928 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 931 ierr = ISDestroy(&list1);CHKERRQ(ierr); 932 ierr = ISDestroy(&list2);CHKERRQ(ierr); 933 #else 934 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 935 #endif 936 break; 937 case 2: 938 #if defined(PETSC_USE_COMPLEX) 939 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 940 941 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 942 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 943 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 944 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 947 ierr = ISDestroy(&list1);CHKERRQ(ierr); 948 ierr = ISDestroy(&list2);CHKERRQ(ierr); 949 #else 950 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 951 952 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 953 954 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 955 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 956 957 if (dim[1]%2==0) NM = dim[1]+2; 958 else NM = dim[1]+1; 959 960 for (i=0; i<local_n0; i++) { 961 for (j=0; j<dim[1]; j++) { 962 tempindx = i*dim[1] + j; 963 tempindx1 = i*NM + j; 964 965 indx1[tempindx]=local_0_start*dim[1]+tempindx; 966 indx2[tempindx]=low+tempindx1; 967 } 968 } 969 970 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 971 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 972 973 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 974 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977 ierr = ISDestroy(&list1);CHKERRQ(ierr); 978 ierr = ISDestroy(&list2);CHKERRQ(ierr); 979 ierr = PetscFree(indx1);CHKERRQ(ierr); 980 ierr = PetscFree(indx2);CHKERRQ(ierr); 981 #endif 982 break; 983 case 3: 984 #if defined(PETSC_USE_COMPLEX) 985 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 986 987 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 988 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 989 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 990 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993 ierr = ISDestroy(&list1);CHKERRQ(ierr); 994 ierr = ISDestroy(&list2);CHKERRQ(ierr); 995 #else 996 997 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 998 999 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1000 1001 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1002 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1003 1004 if (dim[2]%2==0) NM = dim[2]+2; 1005 else NM = dim[2]+1; 1006 1007 for (i=0; i<local_n0; i++) { 1008 for (j=0; j<dim[1]; j++) { 1009 for (k=0; k<dim[2]; k++) { 1010 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1011 tempindx1 = i*dim[1]*NM + j*NM + k; 1012 1013 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1014 indx2[tempindx]=low+tempindx1; 1015 } 1016 } 1017 } 1018 1019 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1020 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1021 1022 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1023 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1026 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1027 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1028 ierr = PetscFree(indx1);CHKERRQ(ierr); 1029 ierr = PetscFree(indx2);CHKERRQ(ierr); 1030 #endif 1031 break; 1032 default: 1033 #if defined(PETSC_USE_COMPLEX) 1034 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1035 1036 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1037 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1038 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1039 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1042 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1043 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1044 #else 1045 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1046 1047 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1048 1049 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1050 1051 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1052 1053 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1054 1055 partial_dim = fftw->partial_dim; 1056 1057 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1058 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1059 1060 if (dim[ndim-1]%2==0) NM = 2; 1061 else NM = 1; 1062 1063 j = low; 1064 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1065 indx1[i] = local_0_start*partial_dim + i; 1066 indx2[i] = j; 1067 if (k%dim[ndim-1]==0) j+=NM; 1068 j++; 1069 } 1070 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1071 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1072 1073 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1074 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1077 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1078 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1079 ierr = PetscFree(indx1);CHKERRQ(ierr); 1080 ierr = PetscFree(indx2);CHKERRQ(ierr); 1081 #endif 1082 break; 1083 } 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatCreate_FFTW" 1090 /* 1091 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1092 1093 Options Database Keys: 1094 + -mat_fftw_plannerflags - set FFTW planner flags 1095 1096 Level: intermediate 1097 1098 */ 1099 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1100 { 1101 PetscErrorCode ierr; 1102 MPI_Comm comm; 1103 Mat_FFT *fft=(Mat_FFT*)A->data; 1104 Mat_FFTW *fftw; 1105 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1106 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1107 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1108 PetscBool flg; 1109 PetscInt p_flag,partial_dim=1,ctr; 1110 PetscMPIInt size,rank; 1111 ptrdiff_t *pdim; 1112 ptrdiff_t local_n1,local_1_start; 1113 #if !defined(PETSC_USE_COMPLEX) 1114 ptrdiff_t temp; 1115 PetscInt N1,tot_dim; 1116 #else 1117 PetscInt n1; 1118 #endif 1119 1120 PetscFunctionBegin; 1121 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1122 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1123 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1124 1125 fftw_mpi_init(); 1126 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1127 pdim[0] = dim[0]; 1128 #if !defined(PETSC_USE_COMPLEX) 1129 tot_dim = 2*dim[0]; 1130 #endif 1131 for (ctr=1; ctr<ndim; ctr++) { 1132 partial_dim *= dim[ctr]; 1133 pdim[ctr] = dim[ctr]; 1134 #if !defined(PETSC_USE_COMPLEX) 1135 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1136 else tot_dim *= dim[ctr]; 1137 #endif 1138 } 1139 1140 if (size == 1) { 1141 #if defined(PETSC_USE_COMPLEX) 1142 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1143 n = N; 1144 #else 1145 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1146 n = tot_dim; 1147 #endif 1148 1149 } else { 1150 ptrdiff_t local_n0,local_0_start; 1151 switch (ndim) { 1152 case 1: 1153 #if !defined(PETSC_USE_COMPLEX) 1154 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1155 #else 1156 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1157 1158 n = (PetscInt)local_n0; 1159 n1 = (PetscInt)local_n1; 1160 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1161 #endif 1162 break; 1163 case 2: 1164 #if defined(PETSC_USE_COMPLEX) 1165 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1166 /* 1167 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1168 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1169 */ 1170 n = (PetscInt)local_n0*dim[1]; 1171 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1172 #else 1173 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1174 1175 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1176 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1177 #endif 1178 break; 1179 case 3: 1180 #if defined(PETSC_USE_COMPLEX) 1181 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1182 1183 n = (PetscInt)local_n0*dim[1]*dim[2]; 1184 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1185 #else 1186 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1187 1188 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1189 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1190 #endif 1191 break; 1192 default: 1193 #if defined(PETSC_USE_COMPLEX) 1194 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1195 1196 n = (PetscInt)local_n0*partial_dim; 1197 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1198 #else 1199 temp = pdim[ndim-1]; 1200 1201 pdim[ndim-1] = temp/2 + 1; 1202 1203 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1204 1205 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1206 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1207 1208 pdim[ndim-1] = temp; 1209 1210 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1211 #endif 1212 break; 1213 } 1214 } 1215 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1216 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1217 fft->data = (void*)fftw; 1218 1219 fft->n = n; 1220 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1221 fftw->partial_dim = partial_dim; 1222 1223 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1224 1225 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1226 1227 fftw->p_forward = 0; 1228 fftw->p_backward = 0; 1229 fftw->p_flag = FFTW_ESTIMATE; 1230 1231 if (size == 1) { 1232 A->ops->mult = MatMult_SeqFFTW; 1233 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1234 } else { 1235 A->ops->mult = MatMult_MPIFFTW; 1236 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1237 } 1238 fft->matdestroy = MatDestroy_FFTW; 1239 A->assembled = PETSC_TRUE; 1240 A->preallocated = PETSC_TRUE; 1241 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1245 1246 /* get runtime options */ 1247 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1248 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1249 if (flg) { 1250 fftw->p_flag = iplans[p_flag]; 1251 } 1252 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 1257 1258 1259