xref: /petsc/src/sys/fileio/fretrieve.c (revision fbf9dbe564678ed6eff1806adbc4c4f01b9743f4)
1 
2 /*
3       Code for opening and closing files.
4 */
5 #include <petscsys.h>
6 #if defined(PETSC_HAVE_PWD_H)
7   #include <pwd.h>
8 #endif
9 #include <ctype.h>
10 #include <sys/stat.h>
11 #if defined(PETSC_HAVE_UNISTD_H)
12   #include <unistd.h>
13 #endif
14 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
15   #include <sys/utsname.h>
16 #endif
17 #include <fcntl.h>
18 #include <time.h>
19 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
20   #include <sys/systeminfo.h>
21 #endif
22 #include <petsc/private/petscimpl.h>
23 
24 /*
25    Private routine to delete tmp/shared storage
26 
27    This is called by MPI, not by users, when communicator attributes are deleted
28 
29    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
30 
31 */
32 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm, PetscMPIInt keyval, void *count_val, void *extra_state)
33 {
34   PetscFunctionBegin;
35   PetscCallMPI(PetscInfo(NULL, "Deleting tmp/shared data in an MPI_Comm %ld\n", (long)comm));
36   PetscCallMPI(PetscFree(count_val));
37   PetscFunctionReturn(MPI_SUCCESS);
38 }
39 
40 /*@C
41    PetscGetTmp - Gets the name of the "tmp" directory, often this is `/tmp`
42 
43    Collective
44 
45    Input Parameters:
46 +  comm - MPI_Communicator that may share tmp
47 -  len - length of string to hold name
48 
49    Output Parameter:
50 .  dir - directory name
51 
52    Options Database Keys:
53 +    -shared_tmp  - indicates the directory is known to be shared among the MPI processes
54 .    -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
55 -    -tmp tmpdir - name of the directory you wish to use as tmp
56 
57    Environmental Variables:
58 +     `PETSC_SHARED_TMP` - indicates the directory is known to be shared among the MPI processes
59 .     `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
60 -     `PETSC_TMP` - name of the directory you wish to use as tmp
61 
62    Level: developer
63 
64 .seealso: `PetscSharedTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
65 @*/
66 PetscErrorCode PetscGetTmp(MPI_Comm comm, char dir[], size_t len)
67 {
68   PetscBool flg;
69 
70   PetscFunctionBegin;
71   PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", dir, len, &flg));
72   if (!flg) PetscCall(PetscStrncpy(dir, "/tmp", len));
73   PetscFunctionReturn(PETSC_SUCCESS);
74 }
75 
76 /*@C
77    PetscSharedTmp - Determines if all processors in a communicator share a
78          tmp directory or have different ones.
79 
80    Collective
81 
82    Input Parameter:
83 .  comm - MPI_Communicator that may share tmp
84 
85    Output Parameter:
86 .  shared - `PETSC_TRUE` or `PETSC_FALSE`
87 
88    Options Database Keys:
89 +    -shared_tmp  - indicates the directory is known to be shared among the MPI processes
90 .    -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
91 -    -tmp tmpdir - name of the directory you wish to use as tmp
92 
93    Environmental Variables:
94 +     `PETSC_SHARED_TMP`  - indicates the directory is known to be shared among the MPI processes
95 .     `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
96 -     `PETSC_TMP` - name of the directory you wish to use as tmp
97 
98    Level: developer
99 
100    Notes:
101    Stores the status as a MPI attribute so it does not have
102     to be redetermined each time.
103 
104       Assumes that all processors in a communicator either
105        1) have a common tmp or
106        2) each has a separate tmp
107       eventually we can write a fancier one that determines which processors
108       share a common tmp.
109 
110    This will be very slow on runs with a large number of processors since
111    it requires O(p*p) file opens.
112 
113    If the environmental variable `PETSC_TMP` is set it will use this directory
114   as the "tmp" directory.
115 
116 .seealso: `PetscGetTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
117 @*/
118 PetscErrorCode PetscSharedTmp(MPI_Comm comm, PetscBool *shared)
119 {
120   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
121   PetscBool   flg, iflg;
122   FILE       *fd;
123   int         err;
124 
125   PetscFunctionBegin;
126   PetscCallMPI(MPI_Comm_size(comm, &size));
127   if (size == 1) {
128     *shared = PETSC_TRUE;
129     PetscFunctionReturn(PETSC_SUCCESS);
130   }
131 
132   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_TMP", NULL, 0, &flg));
133   if (flg) {
134     *shared = PETSC_TRUE;
135     PetscFunctionReturn(PETSC_SUCCESS);
136   }
137 
138   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_TMP", NULL, 0, &flg));
139   if (flg) {
140     *shared = PETSC_FALSE;
141     PetscFunctionReturn(PETSC_SUCCESS);
142   }
143 
144   if (Petsc_SharedTmp_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedTmp_keyval, NULL));
145 
146   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedTmp_keyval, (void **)&tagvalp, (int *)&iflg));
147   if (!iflg) {
148     char filename[PETSC_MAX_PATH_LEN], tmpname[PETSC_MAX_PATH_LEN];
149 
150     /* This communicator does not yet have a shared tmp attribute */
151     PetscCall(PetscMalloc1(1, &tagvalp));
152     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedTmp_keyval, tagvalp));
153 
154     PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", tmpname, 238, &iflg));
155     if (!iflg) {
156       PetscCall(PetscStrncpy(filename, "/tmp", sizeof(filename)));
157     } else {
158       PetscCall(PetscStrncpy(filename, tmpname, sizeof(filename)));
159     }
160 
161     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
162     PetscCallMPI(MPI_Comm_rank(comm, &rank));
163 
164     /* each processor creates a /tmp file and all the later ones check */
165     /* this makes sure no subset of processors is shared */
166     *shared = PETSC_FALSE;
167     for (i = 0; i < size - 1; i++) {
168       if (rank == i) {
169         fd = fopen(filename, "w");
170         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
171         err = fclose(fd);
172         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
173       }
174       PetscCallMPI(MPI_Barrier(comm));
175       if (rank >= i) {
176         fd = fopen(filename, "r");
177         if (fd) cnt = 1;
178         else cnt = 0;
179         if (fd) {
180           err = fclose(fd);
181           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
182         }
183       } else cnt = 0;
184 
185       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
186       if (rank == i) unlink(filename);
187 
188       if (sum == size) {
189         *shared = PETSC_TRUE;
190         break;
191       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share /tmp ");
192     }
193     *tagvalp = (int)*shared;
194     PetscCall(PetscInfo(NULL, "processors %s %s\n", (*shared) ? "share" : "do NOT share", (iflg ? tmpname : "/tmp")));
195   } else *shared = (PetscBool)*tagvalp;
196   PetscFunctionReturn(PETSC_SUCCESS);
197 }
198 
199 /*@C
200   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones.
201 
202   Collective
203 
204   Input Parameter:
205 . comm - MPI_Communicator that may share working directory
206 
207   Output Parameter:
208 . shared - `PETSC_TRUE` or `PETSC_FALSE`
209 
210   Options Database Keys:
211 + -shared_working_directory - indicates the directory is known to be shared among the MPI processes
212 - -not_shared_working_directory - indicates the directory is known to be not shared among the MPI processes
213 
214   Environmental Variables:
215 + `PETSC_SHARED_WORKING_DIRECTORY` - indicates the directory is known to be shared among the MPI processes
216 - `PETSC_NOT_SHARED_WORKING_DIRECTORY` - indicates the directory is known to be not shared among the MPI processes
217 
218   Level: developer
219 
220   Notes:
221   Stores the status as a MPI attribute so it does not have to be redetermined each time.
222 
223   Assumes that all processors in a communicator either
224 .vb
225    1) have a common working directory or
226    2) each has a separate working directory
227 .ve
228   eventually we can write a fancier one that determines which processors share a common working directory.
229 
230   This will be very slow on runs with a large number of processors since it requires O(p*p) file opens.
231 
232 .seealso: `PetscGetTmp()`, `PetscSharedTmp()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
233 @*/
234 PetscErrorCode PetscSharedWorkingDirectory(MPI_Comm comm, PetscBool *shared)
235 {
236   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
237   PetscBool   flg, iflg;
238   FILE       *fd;
239   int         err;
240 
241   PetscFunctionBegin;
242   PetscCallMPI(MPI_Comm_size(comm, &size));
243   if (size == 1) {
244     *shared = PETSC_TRUE;
245     PetscFunctionReturn(PETSC_SUCCESS);
246   }
247 
248   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
249   if (flg) {
250     *shared = PETSC_TRUE;
251     PetscFunctionReturn(PETSC_SUCCESS);
252   }
253 
254   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
255   if (flg) {
256     *shared = PETSC_FALSE;
257     PetscFunctionReturn(PETSC_SUCCESS);
258   }
259 
260   if (Petsc_SharedWD_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedWD_keyval, NULL));
261 
262   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedWD_keyval, (void **)&tagvalp, (int *)&iflg));
263   if (!iflg) {
264     char filename[PETSC_MAX_PATH_LEN];
265 
266     /* This communicator does not yet have a shared  attribute */
267     PetscCall(PetscMalloc1(1, &tagvalp));
268     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedWD_keyval, tagvalp));
269 
270     PetscCall(PetscGetWorkingDirectory(filename, sizeof(filename) - 16));
271     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
272     PetscCallMPI(MPI_Comm_rank(comm, &rank));
273 
274     /* each processor creates a  file and all the later ones check */
275     /* this makes sure no subset of processors is shared */
276     *shared = PETSC_FALSE;
277     for (i = 0; i < size - 1; i++) {
278       if (rank == i) {
279         fd = fopen(filename, "w");
280         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
281         err = fclose(fd);
282         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
283       }
284       PetscCallMPI(MPI_Barrier(comm));
285       if (rank >= i) {
286         fd = fopen(filename, "r");
287         if (fd) cnt = 1;
288         else cnt = 0;
289         if (fd) {
290           err = fclose(fd);
291           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
292         }
293       } else cnt = 0;
294 
295       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
296       if (rank == i) unlink(filename);
297 
298       if (sum == size) {
299         *shared = PETSC_TRUE;
300         break;
301       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share working directory");
302     }
303     *tagvalp = (int)*shared;
304   } else *shared = (PetscBool)*tagvalp;
305   PetscCall(PetscInfo(NULL, "processors %s working directory\n", (*shared) ? "shared" : "do NOT share"));
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 /*@C
310     PetscFileRetrieve - Obtains a file from a URL or a compressed file
311         and copies into local disk space as uncompressed.
312 
313     Collective
314 
315     Input Parameters:
316 +   comm     - processors accessing the file
317 .   url      - name of file, including entire URL (with or without .gz)
318 -   llen     - length of `localname`
319 
320     Output Parameters:
321 +   localname - name of local copy of file - valid on only process zero
322 -   found - if found or retrieved the file - valid on all processes
323 
324     Level: developer
325 
326     Note:
327     if the file already exists locally this function just returns without downloading it.
328 
329 @*/
330 PetscErrorCode PetscFileRetrieve(MPI_Comm comm, const char url[], char localname[], size_t llen, PetscBool *found)
331 {
332   char        buffer[PETSC_MAX_PATH_LEN], *par = NULL, *tlocalname = NULL, name[PETSC_MAX_PATH_LEN];
333   FILE       *fp;
334   PetscMPIInt rank;
335   size_t      len = 0;
336   PetscBool   flg1, flg2, flg3, flg4, download, compressed = PETSC_FALSE;
337 
338   PetscFunctionBegin;
339   PetscCallMPI(MPI_Comm_rank(comm, &rank));
340   if (rank == 0) {
341     *found = PETSC_FALSE;
342 
343     PetscCall(PetscStrstr(url, ".gz", &par));
344     if (par) {
345       PetscCall(PetscStrlen(par, &len));
346       if (len == 3) compressed = PETSC_TRUE;
347     }
348 
349     PetscCall(PetscStrncmp(url, "ftp://", 6, &flg1));
350     PetscCall(PetscStrncmp(url, "http://", 7, &flg2));
351     PetscCall(PetscStrncmp(url, "file://", 7, &flg3));
352     PetscCall(PetscStrncmp(url, "https://", 8, &flg4));
353     download = (PetscBool)(flg1 || flg2 || flg3 || flg4);
354 
355     if (!download && !compressed) {
356       PetscCall(PetscStrncpy(localname, url, llen));
357       PetscCall(PetscTestFile(url, 'r', found));
358       if (*found) {
359         PetscCall(PetscInfo(NULL, "Found file %s\n", url));
360       } else {
361         PetscCall(PetscInfo(NULL, "Did not find file %s\n", url));
362       }
363       goto done;
364     }
365 
366     /* look for uncompressed file in requested directory */
367     if (compressed) {
368       PetscCall(PetscStrncpy(localname, url, llen));
369       PetscCall(PetscStrstr(localname, ".gz", &par));
370       *par = 0; /* remove .gz extension */
371       PetscCall(PetscTestFile(localname, 'r', found));
372       if (*found) goto done;
373     }
374 
375     /* look for file in current directory */
376     PetscCall(PetscStrrchr(url, '/', &tlocalname));
377     PetscCall(PetscStrncpy(localname, tlocalname, llen));
378     if (compressed) {
379       PetscCall(PetscStrstr(localname, ".gz", &par));
380       *par = 0; /* remove .gz extension */
381     }
382     PetscCall(PetscTestFile(localname, 'r', found));
383     if (*found) goto done;
384 
385     if (download) {
386       /* local file is not already here so use curl to get it */
387       PetscCall(PetscStrncpy(localname, tlocalname, llen));
388       PetscCall(PetscStrncpy(buffer, "curl --fail --silent --show-error ", sizeof(buffer)));
389       PetscCall(PetscStrlcat(buffer, url, sizeof(buffer)));
390       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
391       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
392 #if defined(PETSC_HAVE_POPEN)
393       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
394       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
395 #else
396       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
397 #endif
398       PetscCall(PetscTestFile(localname, 'r', found));
399       if (*found) {
400         FILE *fd;
401         char  buf[1024], *str, *substring;
402 
403         /* check if the file didn't exist so it downloaded an HTML message instead */
404         fd = fopen(localname, "r");
405         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscTestFile() indicates %s exists but fopen() cannot open it", localname);
406         str = fgets(buf, sizeof(buf) - 1, fd);
407         while (str) {
408           PetscCall(PetscStrstr(buf, "<!DOCTYPE html>", &substring));
409           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
410           PetscCall(PetscStrstr(buf, "Not Found", &substring));
411           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
412           str = fgets(buf, sizeof(buf) - 1, fd);
413         }
414         fclose(fd);
415       }
416     } else if (compressed) {
417       PetscCall(PetscTestFile(url, 'r', found));
418       if (!*found) goto done;
419       PetscCall(PetscStrncpy(localname, url, llen));
420     }
421     if (compressed) {
422       PetscCall(PetscStrrchr(localname, '/', &tlocalname));
423       PetscCall(PetscStrncpy(name, tlocalname, PETSC_MAX_PATH_LEN));
424       PetscCall(PetscStrstr(name, ".gz", &par));
425       *par = 0; /* remove .gz extension */
426       /* uncompress file */
427       PetscCall(PetscStrncpy(buffer, "gzip -c -d ", sizeof(buffer)));
428       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
429       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
430       PetscCall(PetscStrlcat(buffer, name, sizeof(buffer)));
431 #if defined(PETSC_HAVE_POPEN)
432       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
433       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
434 #else
435       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
436 #endif
437       PetscCall(PetscStrncpy(localname, name, llen));
438       PetscCall(PetscTestFile(localname, 'r', found));
439     }
440   }
441 done:
442   PetscCallMPI(MPI_Bcast(found, 1, MPIU_BOOL, 0, comm));
443   PetscCallMPI(MPI_Bcast(localname, llen, MPI_CHAR, 0, comm));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446