Have open only the file that is being uploaded, not all files
authorKarl O. Pinc <kop@karlpinc.com>
Thu, 14 Jan 2021 04:06:29 +0000 (22:06 -0600)
committerKarl O. Pinc <kop@karlpinc.com>
Thu, 14 Jan 2021 04:06:29 +0000 (22:06 -0600)
src/pgwui_bulk_upload/views/bulk_upload.py

index b897fd57ce185201348ac595c23345948a751881..d1a439089f21d6c18e6ed1c45a1fc417f59f486f 100644 (file)
@@ -17,6 +17,7 @@
 # License along with this program.  If not, see
 # <http://www.gnu.org/licenses/>.
 #
+from __future__ import generator_stop
 
 from pyramid.view import view_config
 import attr
@@ -129,19 +130,44 @@ class SaveBulkLine(DataLineProcessor):
 
 class UploadBulkData(UploadData):
     def __init__(self, fileo, file_fmt, null_data, null_rep,
-                 filepath, relation, trim=True):
+                 path, relation, trim=True):
         '''
-        fileo       Uploaded file object
+        fileo       Stream to uploaded file
         file_fmt    File format: CSV or TAB
         null_data   (boolean) Uploaded data contains nulls
         null_rep    Uploaded string representation of null
-        filepath    Path of file (zip_root relative)
+        path        pathlib path to file.
         relation    Relation, possibly schema qualified
         trim        (boolean) Trim leading and trailing whitespace?
+
+        filepath    Path of file (zip_root relative)
+        reopened    The file has been re-opened after reading the header line
         '''
         super().__init__(fileo, file_fmt, null_data, null_rep, trim=True)
-        self.filepath = filepath
+        self.path = path
+        self.filepath = archive_path(path)
         self.relation = relation
+        self.reopened = False
+
+    def _thunk(self):
+        '''Get the thunk which returns the next udl
+        '''
+        try:
+            yield from super()._thunk()
+        except ValueError:
+            # The file isn't open
+            if self.reopened:
+                super()._thunk().close()
+                return            # skip this file
+            # Reopen the file, now that it is time to upload it
+            self.reopened = True
+            try:
+                self.open_fileo(self.path.open('rb'))
+            except OSError as exp:
+                # If the file does not open on the next iteration it is skipped
+                raise ex.CannotReadError(self.filepath, exp)
+            next(super()._thunk())             # skip header
+            yield from super()._thunk()
 
 
 @attr.s
@@ -309,7 +335,7 @@ class UploadDir():
                                                    uf['upload_fmt'],
                                                    uf['upload_null'],
                                                    uf['null_rep'],
-                                                   archive_path(name),
+                                                   name,
                                                    fmap['relation'],
                                                    trim=uf['trim_upload']))
                 except core_ex.PGWUIError as exp:
@@ -531,6 +557,9 @@ class BulkTableUploadHandler(BaseTableUploadHandler):
                     map_description(fileinfo.filepath, fileinfo.relation),
                     fileinfo.filepath, fileinfo.relation)
                 errors.append(exp)
+            # Limit number of open files, close the file handle until it
+            # is time to read the file
+            fileinfo.close_fileo()
         if errors:
             raise core_ex.MultiError(errors)