xref: /petsc/src/sys/fileio/fretrieve.c (revision 174dc0c8cee294b82b85e4dd3b331b29396264fc)
1 /*
2       Code for opening and closing files.
3 */
4 #include <petscsys.h>
5 #if defined(PETSC_HAVE_PWD_H)
6   #include <pwd.h>
7 #endif
8 #include <ctype.h>
9 #include <sys/stat.h>
10 #if defined(PETSC_HAVE_UNISTD_H)
11   #include <unistd.h>
12 #endif
13 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
14   #include <sys/utsname.h>
15 #endif
16 #include <fcntl.h>
17 #include <time.h>
18 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
19   #include <sys/systeminfo.h>
20 #endif
21 #include <petsc/private/petscimpl.h>
22 
23 /*
24    Private routine to delete tmp/shared storage
25 
26    This is called by MPI, not by users, when communicator attributes are deleted
27 
28 */
29 static PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm, PetscMPIInt keyval, void *count_val, void *extra_state)
30 {
31   PetscFunctionBegin;
32   PetscCallReturnMPI(PetscInfo(NULL, "Deleting tmp/shared data in an MPI_Comm %ld\n", (long)comm));
33   PetscCallReturnMPI(PetscFree(count_val));
34   PetscFunctionReturn(MPI_SUCCESS);
35 }
36 
37 // "Unknown section 'Environmental Variables'"
38 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
39 /*@C
40   PetscGetTmp - Gets the name of the "tmp" directory, often this is `/tmp`
41 
42   Collective
43 
44   Input Parameters:
45 + comm - MPI_Communicator that may share tmp
46 - len  - length of string to hold name
47 
48   Output Parameter:
49 . dir - directory name
50 
51   Options Database Keys:
52 + -shared_tmp     - indicates the directory is known to be shared among the MPI processes
53 . -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
54 - -tmp tmpdir     - name of the directory you wish to use as tmp
55 
56   Environmental Variables:
57 + `PETSC_SHARED_TMP`     - indicates the directory is known to be shared among the MPI processes
58 . `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
59 - `PETSC_TMP`            - name of the directory you wish to use as tmp
60 
61   Level: developer
62 
63 .seealso: `PetscSharedTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
64 @*/
65 PetscErrorCode PetscGetTmp(MPI_Comm comm, char dir[], size_t len)
66 {
67   PetscBool flg;
68 
69   PetscFunctionBegin;
70   PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", dir, len, &flg));
71   if (!flg) PetscCall(PetscStrncpy(dir, "/tmp", len));
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 // "Unknown section 'Environmental Variables'"
76 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
77 /*@
78   PetscSharedTmp - Determines if all processors in a communicator share a
79   tmp directory or have different ones.
80 
81   Collective
82 
83   Input Parameter:
84 . comm - MPI_Communicator that may share tmp
85 
86   Output Parameter:
87 . shared - `PETSC_TRUE` or `PETSC_FALSE`
88 
89   Options Database Keys:
90 + -shared_tmp     - indicates the directory is known to be shared among the MPI processes
91 . -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
92 - -tmp tmpdir     - name of the directory you wish to use as tmp
93 
94   Environmental Variables:
95 + `PETSC_SHARED_TMP`     - indicates the directory is known to be shared among the MPI processes
96 . `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
97 - `PETSC_TMP`            - name of the directory you wish to use as tmp
98 
99   Level: developer
100 
101   Notes:
102   Stores the status as a MPI attribute so it does not have
103   to be redetermined each time.
104 
105   Assumes that all processors in a communicator either
106   1) have a common tmp or
107   2) each has a separate tmp
108   eventually we can write a fancier one that determines which processors
109   share a common tmp.
110 
111   This will be very slow on runs with a large number of processors since
112   it requires O(p*p) file opens.
113 
114   If the environmental variable `PETSC_TMP` is set it will use this directory
115   as the "tmp" directory.
116 
117 .seealso: `PetscGetTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
118 @*/
119 PetscErrorCode PetscSharedTmp(MPI_Comm comm, PetscBool *shared)
120 {
121   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
122   PetscBool   flg, iflg;
123   FILE       *fd;
124   int         err;
125 
126   PetscFunctionBegin;
127   PetscCallMPI(MPI_Comm_size(comm, &size));
128   if (size == 1) {
129     *shared = PETSC_TRUE;
130     PetscFunctionReturn(PETSC_SUCCESS);
131   }
132 
133   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_TMP", NULL, 0, &flg));
134   if (flg) {
135     *shared = PETSC_TRUE;
136     PetscFunctionReturn(PETSC_SUCCESS);
137   }
138 
139   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_TMP", NULL, 0, &flg));
140   if (flg) {
141     *shared = PETSC_FALSE;
142     PetscFunctionReturn(PETSC_SUCCESS);
143   }
144 
145   if (Petsc_SharedTmp_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedTmp_keyval, NULL));
146 
147   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedTmp_keyval, (void **)&tagvalp, (int *)&iflg));
148   if (!iflg) {
149     char filename[PETSC_MAX_PATH_LEN], tmpname[PETSC_MAX_PATH_LEN];
150 
151     /* This communicator does not yet have a shared tmp attribute */
152     PetscCall(PetscMalloc1(1, &tagvalp));
153     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedTmp_keyval, tagvalp));
154 
155     PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", tmpname, 238, &iflg));
156     if (!iflg) {
157       PetscCall(PetscStrncpy(filename, "/tmp", sizeof(filename)));
158     } else {
159       PetscCall(PetscStrncpy(filename, tmpname, sizeof(filename)));
160     }
161 
162     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
163     PetscCallMPI(MPI_Comm_rank(comm, &rank));
164 
165     /* each processor creates a /tmp file and all the later ones check */
166     /* this makes sure no subset of processors is shared */
167     *shared = PETSC_FALSE;
168     for (i = 0; i < size - 1; i++) {
169       if (rank == i) {
170         fd = fopen(filename, "w");
171         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
172         err = fclose(fd);
173         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
174       }
175       PetscCallMPI(MPI_Barrier(comm));
176       if (rank >= i) {
177         fd = fopen(filename, "r");
178         if (fd) cnt = 1;
179         else cnt = 0;
180         if (fd) {
181           err = fclose(fd);
182           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
183         }
184       } else cnt = 0;
185 
186       PetscCallMPI(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
187       if (rank == i) unlink(filename);
188 
189       if (sum == size) {
190         *shared = PETSC_TRUE;
191         break;
192       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share /tmp ");
193     }
194     *tagvalp = (int)*shared;
195     PetscCall(PetscInfo(NULL, "processors %s %s\n", *shared ? "share" : "do NOT share", iflg ? tmpname : "/tmp"));
196   } else *shared = (PetscBool)*tagvalp;
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 // "Unknown section 'Environmental Variables'"
201 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
202 /*@
203   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones.
204 
205   Collective
206 
207   Input Parameter:
208 . comm - MPI_Communicator that may share working directory
209 
210   Output Parameter:
211 . shared - `PETSC_TRUE` or `PETSC_FALSE`
212 
213   Options Database Keys:
214 + -shared_working_directory     - indicates the directory is known to be shared among the MPI processes
215 - -not_shared_working_directory - indicates the directory is known to be not shared among the MPI processes
216 
217   Environmental Variables:
218 + `PETSC_SHARED_WORKING_DIRECTORY`     - indicates the directory is known to be shared among the MPI processes
219 - `PETSC_NOT_SHARED_WORKING_DIRECTORY` - indicates the directory is known to be not shared among the MPI processes
220 
221   Level: developer
222 
223   Notes:
224   Stores the status as a MPI attribute so it does not have to be redetermined each time.
225 
226   Assumes that all processors in a communicator either
227 .vb
228    1) have a common working directory or
229    2) each has a separate working directory
230 .ve
231   eventually we can write a fancier one that determines which processors share a common working directory.
232 
233   This will be very slow on runs with a large number of processors since it requires O(p*p) file opens.
234 
235 .seealso: `PetscGetTmp()`, `PetscSharedTmp()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
236 @*/
237 PetscErrorCode PetscSharedWorkingDirectory(MPI_Comm comm, PetscBool *shared)
238 {
239   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
240   PetscBool   flg, iflg;
241   FILE       *fd;
242   int         err;
243 
244   PetscFunctionBegin;
245   PetscCallMPI(MPI_Comm_size(comm, &size));
246   if (size == 1) {
247     *shared = PETSC_TRUE;
248     PetscFunctionReturn(PETSC_SUCCESS);
249   }
250 
251   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
252   if (flg) {
253     *shared = PETSC_TRUE;
254     PetscFunctionReturn(PETSC_SUCCESS);
255   }
256 
257   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
258   if (flg) {
259     *shared = PETSC_FALSE;
260     PetscFunctionReturn(PETSC_SUCCESS);
261   }
262 
263   if (Petsc_SharedWD_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedWD_keyval, NULL));
264 
265   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedWD_keyval, (void **)&tagvalp, (int *)&iflg));
266   if (!iflg) {
267     char filename[PETSC_MAX_PATH_LEN];
268 
269     /* This communicator does not yet have a shared  attribute */
270     PetscCall(PetscMalloc1(1, &tagvalp));
271     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedWD_keyval, tagvalp));
272 
273     PetscCall(PetscGetWorkingDirectory(filename, sizeof(filename) - 16));
274     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
275     PetscCallMPI(MPI_Comm_rank(comm, &rank));
276 
277     /* each processor creates a  file and all the later ones check */
278     /* this makes sure no subset of processors is shared */
279     *shared = PETSC_FALSE;
280     for (i = 0; i < size - 1; i++) {
281       if (rank == i) {
282         fd = fopen(filename, "w");
283         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
284         err = fclose(fd);
285         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
286       }
287       PetscCallMPI(MPI_Barrier(comm));
288       if (rank >= i) {
289         fd = fopen(filename, "r");
290         if (fd) cnt = 1;
291         else cnt = 0;
292         if (fd) {
293           err = fclose(fd);
294           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
295         }
296       } else cnt = 0;
297 
298       PetscCallMPI(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
299       if (rank == i) unlink(filename);
300 
301       if (sum == size) {
302         *shared = PETSC_TRUE;
303         break;
304       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share working directory");
305     }
306     *tagvalp = (int)*shared;
307   } else *shared = (PetscBool)*tagvalp;
308   PetscCall(PetscInfo(NULL, "processors %s working directory\n", (*shared) ? "shared" : "do NOT share"));
309   PetscFunctionReturn(PETSC_SUCCESS);
310 }
311 
312 /*@C
313   PetscFileRetrieve - Obtains a file from a URL or a compressed file
314   and copies into local disk space as uncompressed.
315 
316   Collective
317 
318   Input Parameters:
319 + comm - processors accessing the file
320 . url  - name of file, including entire URL (with or without .gz)
321 - llen - length of `localname`
322 
323   Output Parameters:
324 + localname - name of local copy of file - valid on only process zero
325 - found     - if found or retrieved the file - valid on all processes
326 
327   Level: developer
328 
329   Note:
330   if the file already exists locally this function just returns without downloading it.
331 
332 .seealso: `PetscDLLibraryRetrieve()`
333 @*/
334 PetscErrorCode PetscFileRetrieve(MPI_Comm comm, const char url[], char localname[], size_t llen, PetscBool *found)
335 {
336   char        buffer[PETSC_MAX_PATH_LEN], *par = NULL, *tlocalname = NULL, name[PETSC_MAX_PATH_LEN];
337   FILE       *fp;
338   PetscMPIInt rank;
339   size_t      len = 0;
340   PetscBool   flg1, flg2, flg3, flg4, download, compressed = PETSC_FALSE;
341 
342   PetscFunctionBegin;
343   PetscCallMPI(MPI_Comm_rank(comm, &rank));
344   if (rank == 0) {
345     *found = PETSC_FALSE;
346 
347     PetscCall(PetscStrstr(url, ".gz", &par));
348     if (par) {
349       PetscCall(PetscStrlen(par, &len));
350       if (len == 3) compressed = PETSC_TRUE;
351     }
352 
353     PetscCall(PetscStrncmp(url, "ftp://", 6, &flg1));
354     PetscCall(PetscStrncmp(url, "http://", 7, &flg2));
355     PetscCall(PetscStrncmp(url, "file://", 7, &flg3));
356     PetscCall(PetscStrncmp(url, "https://", 8, &flg4));
357     download = (PetscBool)(flg1 || flg2 || flg3 || flg4);
358 
359     if (!download && !compressed) {
360       PetscCall(PetscStrncpy(localname, url, llen));
361       PetscCall(PetscTestFile(url, 'r', found));
362       if (*found) {
363         PetscCall(PetscInfo(NULL, "Found file %s\n", url));
364       } else {
365         PetscCall(PetscInfo(NULL, "Did not find file %s\n", url));
366       }
367       goto done;
368     }
369 
370     /* look for uncompressed file in requested directory */
371     if (compressed) {
372       PetscCall(PetscStrncpy(localname, url, llen));
373       PetscCall(PetscStrstr(localname, ".gz", &par));
374       *par = 0; /* remove .gz extension */
375       PetscCall(PetscTestFile(localname, 'r', found));
376       if (*found) goto done;
377     }
378 
379     /* look for file in current directory */
380     PetscCall(PetscStrrchr(url, '/', &tlocalname));
381     PetscCall(PetscStrncpy(localname, tlocalname, llen));
382     if (compressed) {
383       PetscCall(PetscStrstr(localname, ".gz", &par));
384       *par = 0; /* remove .gz extension */
385     }
386     PetscCall(PetscTestFile(localname, 'r', found));
387     if (*found) goto done;
388 
389     if (download) {
390       /* local file is not already here so use curl to get it */
391       PetscCall(PetscStrncpy(localname, tlocalname, llen));
392       PetscCall(PetscStrncpy(buffer, "curl --fail --silent --show-error ", sizeof(buffer)));
393       PetscCall(PetscStrlcat(buffer, url, sizeof(buffer)));
394       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
395       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
396 #if defined(PETSC_HAVE_POPEN)
397       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
398       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
399 #else
400       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
401 #endif
402       PetscCall(PetscTestFile(localname, 'r', found));
403       if (*found) {
404         FILE *fd;
405         char  buf[1024], *str, *substring;
406 
407         /* check if the file didn't exist so it downloaded an HTML message instead */
408         fd = fopen(localname, "r");
409         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscTestFile() indicates %s exists but fopen() cannot open it", localname);
410         str = fgets(buf, sizeof(buf) - 1, fd);
411         while (str) {
412           PetscCall(PetscStrstr(buf, "<!DOCTYPE html>", &substring));
413           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
414           PetscCall(PetscStrstr(buf, "Not Found", &substring));
415           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
416           str = fgets(buf, sizeof(buf) - 1, fd);
417         }
418         fclose(fd);
419       }
420     } else if (compressed) {
421       PetscCall(PetscTestFile(url, 'r', found));
422       if (!*found) goto done;
423       PetscCall(PetscStrncpy(localname, url, llen));
424     }
425     if (compressed) {
426       PetscCall(PetscStrrchr(localname, '/', &tlocalname));
427       PetscCall(PetscStrncpy(name, tlocalname, PETSC_MAX_PATH_LEN));
428       PetscCall(PetscStrstr(name, ".gz", &par));
429       *par = 0; /* remove .gz extension */
430       /* uncompress file */
431       PetscCall(PetscStrncpy(buffer, "gzip -c -d ", sizeof(buffer)));
432       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
433       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
434       PetscCall(PetscStrlcat(buffer, name, sizeof(buffer)));
435 #if defined(PETSC_HAVE_POPEN)
436       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
437       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
438 #else
439       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
440 #endif
441       PetscCall(PetscStrncpy(localname, name, llen));
442       PetscCall(PetscTestFile(localname, 'r', found));
443     }
444   }
445 done:
446   PetscCallMPI(MPI_Bcast(found, 1, MPIU_BOOL, 0, comm));
447   PetscCallMPI(MPI_Bcast(localname, (PetscMPIInt)llen, MPI_CHAR, 0, comm));
448   PetscFunctionReturn(PETSC_SUCCESS);
449 }
450