From: Karl O. Pinc Date: Mon, 5 Aug 2024 03:34:53 +0000 (-0500) Subject: Process the uploaded file X-Git-Url: https://papio.biology.duke.edu/gitweb/?a=commitdiff_plain;h=32dafe22b2e13b14c3eb6e6d0fca73dfd20580cf;p=pgwui_upcen Process the uploaded file --- diff --git a/src/pgwui_upcen/exceptions.py b/src/pgwui_upcen/exceptions.py index 69a549b..06e2bfe 100644 --- a/src/pgwui_upcen/exceptions.py +++ b/src/pgwui_upcen/exceptions.py @@ -19,43 +19,31 @@ # Karl O. Pinc -from pgwui_common import exceptions as common_ex from pgwui_core import exceptions as core_ex +from pgwui_upload_core import exceptions as upload_ex -# PGWUI setting and user-supplied value related exceptions +# Data error related exceptions -class ExampleSetupError(common_ex.SetupError): - pass - - -class ExampleOnOffAskError(ExampleSetupError): - def __init__(self, value): - super().__init__( - ('The "pgwui:PGWUI_UpCen:example_on_off_ask" PGWUI setting ' - ' must be "on", "off", "ask", or not present'), - descr=f'Got: ({value})') +class UnknownGroupError(upload_ex.BadHeadersError): + def __init__(self, e, descr='', detail=''): + super().__init__(e, descr=descr, detail=detail) -class ExampleDetailedError(ExampleSetupError): +class NoDatesError(upload_ex.BadHeadersError): def __init__(self, e, descr='', detail=''): - super().__init__('Detailed error', descr=descr, detail=detail) + super().__init__(e, descr=descr, detail=detail) -# Data error related exception +# Line-by-line processing errors -class ExampleTooManyRowsError(core_ex.DataLineError): - ''' - Module exception rasied while line-by-line processing the uploaded - data. +class UnknownSnameError(core_ex.DataLineError): + def __init__(self, lineno, e, descr='', detail='', data=''): + super().__init__( + lineno, e, descr=descr, detail=detail, data=data) - lineno The line number - e The error message - descr More description of the error - detail Extra HTML describing the error - data The uploaded data - ''' + +class UnknownStatusError(core_ex.DataLineError): def __init__(self, lineno, e, descr='', detail='', data=''): - if descr is None: - descr = 'Too many rows of uploaded data' - super().__init__(e, lineno, descr, detail, data) + super().__init__( + lineno, e, descr=descr, detail=detail, data=data) diff --git a/src/pgwui_upcen/views/upcen.py b/src/pgwui_upcen/views/upcen.py index 03ccaa7..873b388 100644 --- a/src/pgwui_upcen/views/upcen.py +++ b/src/pgwui_upcen/views/upcen.py @@ -29,30 +29,72 @@ from pgwui_common.view import auth_base_view from pgwui_upload_core.views.upload import ( BaseTableUploadHandler, UploadCoreInitialPost, - match_insert_to_dataline, set_upload_response, ) from pgwui_upcen import exceptions as upcen_ex +import markupsafe import psycopg -# Useless usage of the imported exceptions to keep `make check` from failing -upcen_ex.ExampleOnOffAskError('42') +GROUP_EXISTS_QUERY = 'SELECT 1 FROM groups WHERE groups.gid = %s' +SNAME_EXISTS_QUERY = 'SELECT 1 FROM biography WHERE biography.sname = %s' +INSERT_STMT = ('INSERT INTO census (sname, date, grp, status, cen)' + ' values(%s, %s, %s, %s, TRUE);') log = logging.getLogger(__name__) -class SaveLine(pgwui_core.core.DataLineProcessor, - pgwui_core.core.ParameterExecutor): - def __init__(self, ue, uh, insert_stmt): +class SaveUCLine(pgwui_core.core.DataLineProcessor, + pgwui_core.core.ParameterExecutor): + def __init__(self, ue, uh, group, dates): ''' ue UploadEngine instance uh UploadHandler instance - insert_stmt Statement used to insert into db. - (psycopg3 formatted for substituion) + group The GROUPS.GID supplied in the heading + dates Iterable of the dates supplied in the heading + cols The number of columns supplied in the heading ''' super().__init__(ue, uh) - self.insert_stmt = insert_stmt + self.group = group + self.dates = dates + self.cols = len(dates) + 1 + + def get_sname(self, udl): + sname = udl[0].strip() + self.execute(SNAME_EXISTS_QUERY, sname) + if self.cur.fetchone() is None: + raise upcen_ex.UnknownSnameError( + udl.lineno, + 'The first column of each line must be a BIOGRAPHY.Sname', + descr=f'The BIOGRAPHY.Sname ({sname}) does not exist', + data=udl.raw) + return sname + + def check_data_cols(self, udl): + data_cols = len(udl.tuples) + if data_cols != self.cols: + descr = (f'There are {data_cols} data columns but there' + f" are {self.cols} columns in the file's first line") + if data_cols > self.cols: + raise core_ex.TooManyColsError( + udl.lineno, 'Too many data columns', descr=descr, + data=udl.raw) + else: + raise core_ex.TooFewColsError( + udl.lineno, 'Too few data columns', descr=descr, + data=udl.raw) + + def get_status(self, udl, code, date): + if code == '0': + return 'A' + elif code == '': + return 'C' + raise upcen_ex.UnknownStatusError( + udl.lineno, + 'Unknown status code', + descr=(f'The status code ({code}), appearing in the date' + f' ({date}) column, does not exist'), + data=udl.raw) def eat(self, udl): ''' @@ -60,8 +102,28 @@ class SaveLine(pgwui_core.core.DataLineProcessor, udl An UploadDataLine instance ''' - match_insert_to_dataline(udl, self.insert_stmt) - self.param_execute(self.insert_stmt.stmt, udl) + sname = self.get_sname(udl) + self.check_data_cols(udl) + + # Insert in reverse order, so the statdate is not constantly updated. + # Updating the statdate causes a lot of checking to be done. + errors = [] + col = len(udl.tuples) - 2 + for code in udl.tuples[1:].reverse(): + date = self.dates[col] + code = code.strip() + if code != 'N': + try: + status = self.get_status(udl, code, date) + except upcen_ex.UnknownStatusCode as err: + errors.append(err) + else: + self.param_execute( + INSERT_STMT, sname, date, self.group, status) + col -= 1 + + if errors: + raise core_ex.MultiError(errors) class UpCenUploadHandler(BaseTableUploadHandler): @@ -82,6 +144,27 @@ class UpCenUploadHandler(BaseTableUploadHandler): return pgwui_core.core.UploadDoubleTableForm().build( self, ip=ip) + def get_group(self, headers): + group = headers.tuples[0].strip() + self.execute(GROUP_EXISTS_QUERY, group) + if self.cur.fetchone() is None: + raise upcen_ex.UnknownGroupError( + 'The first column of the first line must be a GROUPS.GID', + descr=f'The GROUPS.GID ({group}) does not exist', + detail=('

The first line is:' + f'({markupsafe.excape(headers.raw)})

')) + return group + + def get_dates(self, headers): + dates = [date.strip() for date in headers.tuples[1:]] + if len(dates) == 0: + raise upcen_ex.NoDatesError( + 'The columns of the first line must contain dates', + descr="No dates were found", + detail=('

The first line is:' + f'({markupsafe.excape(headers.raw)})

')) + return dates + def factory(self, ue): '''Make a db loader function from an UploadEngine. @@ -93,13 +176,12 @@ class UpCenUploadHandler(BaseTableUploadHandler): ''' super().factory(ue) - qualified_table = self.uf['table'] try: - insert_stmt = self.build_insert_stmt( - ue.data, qualified_table, False, lambda x: x) - - return SaveLine(ue, self, insert_stmt) + headers = self.data.headers + group = self.get_group(headers) + dates = self.get_dates(headers) + return SaveUCLine(ue, self, group, dates) except (core_ex.PGWUIError, psycopg.Error): self.data.lineno = 0 # Don't report partially read number of lines raise