From: Karl O. Pinc Date: Wed, 24 Oct 2018 22:13:03 +0000 (-0500) Subject: Rename module from pgwui_core to core X-Git-Url: https://papio.biology.duke.edu/gitweb/?a=commitdiff_plain;h=3e1d8926e182313774f9156ae644fde1e35f8bf3;p=pgwui_core Rename module from pgwui_core to core --- diff --git a/src/pgwui_core/core.py b/src/pgwui_core/core.py new file mode 100644 index 0000000..fe14ccf --- /dev/null +++ b/src/pgwui_core/core.py @@ -0,0 +1,1969 @@ +# Copyright (C) 2013, 2014, 2015, 2018 The Meme Factory, Inc. +# http://www.meme.com/ + +# This file is part of PGWUI_Core. +# +# This program is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public +# License along with this program. If not, see +# . +# + +# Karl O. Pinc + +'''Python source code for pgwui_core module + +More general description of the module. +''' + +# There are main objects, and their subclasses, here: +# LoadedForm +# DBHandler (generally referred to a an "upload handler", at present) +# DBConnector (UploadEngine) +# +# See their documentation below. + +# Write python 3 compatible code. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import +from __future__ import division + +from csv import reader as csv_reader +from cgi import escape as cgi_escape +import collections +import ast +import markupsafe +import hashlib +import io + +# We are not really using wtforms. We use it to (barely) +# interact with the html and post request but really +# we define our own classes to handle working memory +# and interacting with the session. +from wtforms import ( + Form, + BooleanField, + StringField, + RadioField, + PasswordField, + FileField) + +import psycopg2 + +from pgwui_core import form_constants + + +# Setup default values for forms. + +class UserInitialPost(): + db = form_constants.LIVE_DB + user = '' + password = '' + + +class AuthInitialPost(): + db = '' + user = '' + password = '' + + +class UploadFileInitialPost(AuthInitialPost): + upload_fmt = form_constants.CSV + datafile = '' + trim_upload = True + + +class UploadNullFileInitialPost(UploadFileInitialPost): + upload_null = True + null_rep = '' + + +class UploadTableInitialPost(UploadNullFileInitialPost): + table = '' + + +# The wtforms that suck data out of the html. + +class UserWTForm(Form): + '''The wtform used to connect to the db to authenticate .''' + # We don't actually use the labels, wanting the template to + # look (and render) like html, but I'll define them anyway + # just to keep my hand in. + user = StringField('User:') + password = PasswordField('Password:') + + ivals = UserInitialPost + + +class AuthWTForm(UserWTForm): + '''The wtform used to connect to any db and authenticate.''' + # We don't actually use the labels, wanting the template to + # look (and render) like html, but I'll define them anyway + # just to keep my hand in. + db = StringField('Database:') + + ivals = AuthInitialPost + + +class UploadFileWTForm(AuthWTForm): + '''The wtform used for uploading files.''' + # We don't actually use the labels, wanting the template to + # look (and render) like html, but I'll define them anyway + # just to keep my hand in. + upload_fmt = RadioField('Upload Format:', + choices=[('Upload CSV Data:', form_constants.CSV), + ('Upload tab delimited Data:', + form_constants.TAB)]) + datafile = FileField('File with CSV or Tab delimited Data:') + trim_upload = BooleanField('Trim Leading/Trailing Spaces:') + + ivals = UploadFileInitialPost + + +class UploadNullFileWTForm(UploadFileWTForm): + '''The wtform used for uploading files that may contain NULL.''' + # We don't actually use the labels, wanting the template to + # look (and render) like html, but I'll define them anyway + # just to keep my hand in. + upload_null = BooleanField('Upload NULL Values:') + null_rep = StringField('NULL Representation:') + + ivals = UploadNullFileInitialPost + + +class UploadTableWTForm(UploadNullFileWTForm): + '''The wtform used for uploading arbitrary data into tables.''' + table = StringField('Table or View:') + + ivals = UploadTableInitialPost + + +class LoadedForm(collections.MutableMapping): + ''' + Abstract class representing an upload form. + + Responsible for getting information into and out of + html forms. + + The user API is that it acts like a dict, but with extra methods. + + Attributes: + uh The UploadHandler instance using the form + + _store Where the real dict is kept + _form Instantaiated html form object (WTForms) + _fc Class handling html form + ''' + def __init__(self, uh, fc=None, data={}, **kwargs): + super(LoadedForm, self).__init__() + self.uh = uh + if data == {}: + store = dict(kwargs) + else: + store = dict(data) + store.update(kwargs) + self._store = store + self._fc = fc + + def __iter__(self): + return next(self._store) + + def __len__(self): + return len(self._store) + + def __getitem__(self, key): + return self._store[key] + + def __setitem__(self, key, value): + self._store[key] = value + + def __delitem__(self, key): + del self._store[key] + + def read(self): + ''' + In the children this loads form from pyramid self.uh.request + object and self._form and the session. + + In this case we instantiate _form and give it some defaults + ''' + self._form = self._fc(self.uh.request.POST, self._fc.ivals) + + def write(self, response, errors): + ''' + Produces the dict pyramid will use to render the form. + + Input: + response Dict of results from connection execution + errors List of errors from connection execution + ''' + response['errors'] = errors + return response + + +class CredsLoadedForm(LoadedForm): + ''' + Acts like a dict, but with extra methods. + Manages credentials (but not db) needed to authenticate. + + Attributes: + uh The UploadHandler instance using the form + user The username used to login + _form Instantaiated html form object (WXForms) + + Methods: + read() Load form from pyramid request object. + ''' + def __init__(self, uh, fc=UserWTForm, data={}, **kwargs): + data.update(kwargs) + super(CredsLoadedForm, self).__init__(uh, fc, data) + + def session_put(self, key, value): + ''' + Put data into the session. + + Input: + key The key + value The value + + Returns: + + Side effects: + Modifies session + + May be overridden by a subclass to keep data out + of the session. + ''' + self.uh.session[key] = value + + def read(self): + ''' + Read form data from the client + ''' + + # Read parent's data + super(CredsLoadedForm, self).read() + + # Read our form data + + # Keep password and user in the session. All the other + # form varaibles must be re-posted. + post = self.uh.request.POST + session = self.uh.request.session + + # Defaults are now in place in self._form for password + # and user. Ignore these since we want to know whether + # to go to the session for data values. + if 'password' in post: + self['password'] = post['password'] + self.session_put('password', self['password']) + elif 'password' in session: + self['password'] = session['password'] + + if 'user' in post: + self['user'] = post['user'] + self.session_put('user', self['user']) + elif 'user' in session: + self['user'] = session['user'] + else: + self['user'] = '' + + # Other, hidden, POST variables + if 'action' in post: + self['action'] = post['action'] + else: + self['action'] = '' + + def write(self, result, errors): + ''' + Produces the dict pyramid will use to render the form. + ''' + response = super(CredsLoadedForm, self).write(result, errors) + if ('havecreds' not in response + or ('havecreds' in response and not response['havecreds'])): + # We don't know if the credentials are good or + # we know they are bad. Keep them out of the session. + response['user'] = '' + response['password'] = '' + self.session_put('user', '') + self.session_put('password', '') + else: + response['user'] = self['user'] + return response + + +class AuthLoadedForm(CredsLoadedForm): + ''' + Acts like a dict, but with extra methods. + Manages form data needed to authenticate, including db to authenticate + in. + + Attributes: + uh The UploadHandler instance using the form + user The Usernamed used to login + db The db to login to + _form Instantaiated html form object (WXForms) + + ''' + def __init__(self, uh, fc=AuthWTForm, data={}, **kwargs): + data.update(kwargs) + super(AuthLoadedForm, self).__init__(uh, fc, data) + + def read(self): + ''' + Read form data from the client + ''' + + # Read parent's data + super(AuthLoadedForm, self).read() + + # Keep form variables handy + self['db'] = self._form.db.data + + def write(self, result, errors): + ''' + Produces the dict pyramid will use to render the form. + ''' + response = super(AuthLoadedForm, self).write(result, errors) + response['db'] = self['db'] + return response + + +class UploadFileForm(AuthLoadedForm): + ''' + Acts like a dict, but with extra methods. + + Attributes: + uh The UploadHandler instance using the form + + Methods: + read() Load form from pyramid request object. + ''' + def __init__(self, uh, fc=UploadFileWTForm, data={}, **kwargs): + data.update(kwargs) + super(UploadFileForm, self).__init__(uh, fc, data) + + def read(self): + ''' + Read form data from the client + ''' + + # Read parent's data + super(UploadFileForm, self).read() + + # Read our own data + self['upload_fmt'] = self._form.upload_fmt.data + self['trim_upload'] = self._form.trim_upload.data + + # Other POST variables involving a file + self['filename'] = '' + self['localfh'] = '' + if 'action' in self: + if self._form.datafile.data != '': + post = self.uh.request.POST + if hasattr(post['datafile'], 'filename'): + self['filename'] = post['datafile'].filename + if hasattr(post['datafile'], 'file'): + self['localfh'] = post['datafile'].file + + def write(self, result, errors): + ''' + Produces the dict pyramid will use to render the form. + ''' + if self['upload_fmt'] == form_constants.CSV: + csv_checked = form_constants.CHECKED + tab_checked = form_constants.UNCHECKED + else: + tab_checked = form_constants.CHECKED + csv_checked = form_constants.UNCHECKED + + if self['trim_upload']: + trim_upload_checked = form_constants.CHECKED + else: + trim_upload_checked = form_constants.UNCHECKED + + response = super(UploadFileForm, self).write(result, errors) + response['filename'] = self['filename'] + response['trim_upload'] = trim_upload_checked + response['csv_value'] = form_constants.CSV_VALUE + response['tab_value'] = form_constants.TAB_VALUE + response['csv_checked'] = csv_checked + response['tab_checked'] = tab_checked + return response + + +class UploadDoubleFileForm(UploadFileForm): + ''' + Acts like a dict, but with extra methods. + + Attributes: + uh The UploadHandler instance using the form + + Methods: + read() Load form from pyramid request object. + ''' + def __init__(self, uh, fc=UploadFileWTForm, data={}, **kwargs): + data.update(kwargs) + super(UploadDoubleFileForm, self).__init__(uh, fc, data) + + def read(self): + ''' + Read form data from the client + ''' + # Read parent's data + super(UploadDoubleFileForm, self).read() + + # Read our own data + post = self.uh.request.POST + if 'last_key' in post: + self['last_key'] = post['last_key'] + else: + self['last_key'] = '' + + def write(self, result, errors): + ''' + Produces the dict pyramid will use to render the form. + ''' + response = super(UploadDoubleFileForm, self).write(result, errors) + response['last_key'] = self['last_key'] + return response + + +class UploadNullFileForm(UploadFileForm): + ''' + Acts like a dict, but with extra methods. + + Attributes: + uh The UploadHandler instance using the form + + Methods: + read() Load form from pyramid request object. + ''' + def __init__(self, uh, fc=UploadNullFileWTForm, data={}, **kwargs): + data.update(kwargs) + super(UploadNullFileForm, self).__init__(uh, fc, data) + + def read(self): + ''' + Read form data from the client + ''' + + # Read parent's data + super(UploadNullFileForm, self).read() + + # Read our own data + self['upload_null'] = self._form.upload_null.data + self['null_rep'] = self._form.null_rep.data + + def write(self, result, errors): + ''' + Produces the dict pyramid will use to render the form. + ''' + if self['upload_null']: + upload_null_checked = form_constants.CHECKED + else: + upload_null_checked = form_constants.UNCHECKED + + response = super(UploadNullFileForm, self).write(result, errors) + response['upload_null'] = upload_null_checked + response['null_rep'] = self['null_rep'] + return response + + +class UploadTableForm(UploadNullFileForm): + ''' + Acts like a dict, but with extra methods. + + Attributes: + uh The UploadHandler instance using the form + + Methods: + read() Load form from pyramid request object. + ''' + def __init__(self, uh, fc=UploadTableWTForm, data={}, **kwargs): + data.update(kwargs) + super(UploadTableForm, self).__init__(uh, fc, data) + + def read(self): + ''' + Read form data from the client + ''' + + # Read parent's data + super(UploadTableForm, self).read() + + # Read our own data + self['table'] = self._form.table.data + + def write(self, result, errors): + ''' + Produces the dict pyramid will use to render the form. + ''' + response = super(UploadTableForm, self).write(result, errors) + response['table'] = self['table'] + return response + + +class UploadDoubleTableForm(UploadTableForm): + ''' + Acts like a dict, but with extra methods. + + Attributes: + uh The UploadHandler instance using the form + + Methods: + read() Load form from pyramid request object. + ''' + def __init__(self, uh, fc=UploadTableWTForm, data={}, **kwargs): + data.update(kwargs) + super(UploadDoubleTableForm, self).__init__(uh, fc, data) + + def read(self): + ''' + Read form data from the client + ''' + # Read parent's data + super(UploadDoubleTableForm, self).read() + + # Read our own data + post = self.uh.request.POST + if 'last_key' in post: + self['last_key'] = post['last_key'] + else: + self['last_key'] = '' + + def write(self, result, errors): + ''' + Produces the dict pyramid will use to render the form. + ''' + response = super(UploadDoubleTableForm, self).write(result, errors) + response['last_key'] = self['last_key'] + return response + + +# Utility functions + +def textualize(st): + ''' + Return pg representation of NULL for None when string st is None. + ''' + return 'NULL' if st is None else st + + +def is_checked(val): + '''Is the value something a html input entity recognizes as checked?''' + return val == form_constants.CHECKED + + +# Some functions for logging + +def escape_eol(string): + '''Change all the newlines to \n.''' + return string.replace('\n', r'\n') + + +def format_exception(ex): + '''Return an exception formatted as suffix text for a log message.''' + if isinstance(ex, psycopg2.DatabaseError): + diag = ex.diag + msg = diag.message_primary + if hasattr(diag, 'message_detail'): + msg += ', detail={0}'.format(escape_eol(diag.message_detail)) + if hasattr(diag, 'message_hint'): + msg += ', hint={0}'.format(escape_eol(diag.message_hint)) + elif isinstance(ex, UploadError): + msg = ex.e + if ex.descr != '': + msg += ' {0}'.format(escape_eol(ex.descr)) + if ex.detail != '': + msg += ' {0}'.format(escape_eol(ex.detail)) + else: + msg = '' + if msg != '': + msg = ': Error is ({0})'.format(msg) + return msg + + +# Error handling + +class UploadError(Exception): + ''' + Module exceptions are derived from this class. + + lineno Line number to which error pertains, if any + e The error message + descr More description of the error + detail Extra HTML describing the error + data Line of data causing problem, if any + + UploadError + * Error + * NoHeadersError + * NoDataError + * DBError + * DBCommitError + * DBDataLineError + * DataLineError + * TooManyColsError + ''' + def __init__(self, e, lineno='', descr='', detail='', data=''): + super(UploadError, self).__init__() + self.lineno = lineno + self.e = e + self.descr = descr + self.detail = detail + self.data = data + + def __str__(self): + out = 'error ({0})'.format(self.e) + if self.lineno != '': + out = '{0}: lineno ({1})'.format(out, self.lineno) + if self.descr != '': + out = '{0}: descr ({1})'.format(out, self.descr) + if self.detail != '': + out = '{0}: detail ({1})'.format(out, self.detail) + if self.data != '': + out = '{0}: data ({1})'.format(out, self.data) + return out + + +class Error(UploadError): + ''' + Module exceptions rasied while setting up to read data lines + are derived from this class. + + e The error message + descr More description of the error + detail Extra HTML describing the error + ''' + def __init__(self, e, descr='', detail=''): + super(Error, self).__init__(e=e, descr=descr, detail=detail) + + +class NoFileError(Error): + '''No file uploaded''' + def __init__(self, e, descr='', detail=''): + super(NoFileError, self).__init__(e, descr, detail) + + +class NoDBError(Error): + '''No database name given''' + def __init__(self, e, descr='', detail=''): + super(NoDBError, self).__init__(e, descr, detail) + + +class NoUserError(Error): + '''No user name supplied''' + def __init__(self, e, descr='', detail=''): + super(NoUserError, self).__init__(e, descr, detail) + + +class AuthFailError(Error): + '''Unable to connect to the db''' + def __init__(self, e, descr='', detail=''): + super(AuthFailError, self).__init__(e, descr, detail) + + +class DryRunError(Error): + '''Rollback due to dry_run config option''' + def __init__(self, e, descr='', detail=''): + super(DryRunError, self).__init__(e, descr, detail) + + +class CSRFError(Error): + '''Invalid CSRF token''' + def __init__(self, e, descr='', detail=''): + super(CSRFError, self).__init__(e, descr, detail) + + +class NoHeadersError(Error): + '''No column headings found''' + def __init__(self, e, descr='', detail=''): + super(NoHeadersError, self).__init__(e, descr, detail) + + +class NoDataError(Error): + '''No data uploaded''' + def __init__(self, e, descr='', detail=''): + super(NoDataError, self).__init__(e, descr, detail) + + +class DuplicateUploadError(Error): + '''The same filename updated twice into the same db''' + def __init__(self, e, descr='', detail=''): + super(DuplicateUploadError, self).__init__(e, descr, detail) + + +class DataInconsistencyError(Error): + def __init__(self, e, descr='', detail=''): + super(DataInconsistencyError, self).__init__(e, descr, detail) + + +class DBError(Error): + '''psycopg2 raised an error''' + def __init__(self, pgexc, e='process your request'): + ''' + pgexc The psycopg2 exception object + e Description of what PG was doing + ''' + super(DBError, self).__init__( + 'PostgreSQL is unable to ' + e + ':', + 'It reports:', + self.html_blockquote(pgexc)) + + def html_blockquote(self, ex): + ''' + Produce an html formatted message from a psycopg2 DatabaseError + exception. + ''' + primary = cgi_escape(ex.diag.message_primary) + + if ex.diag.message_detail is None: + detail = '' + else: + detail = '
DETAIL: ' + cgi_escape(ex.diag.message_detail) + + if ex.diag.message_hint is None: + hint = '' + else: + hint = '
HINT: ' + cgi_escape(ex.diag.message_hint) + + return '

{0}: {1}{2}{3}

'.format( + ex.diag.severity, + primary, + detail, + hint) + + +class DBCommitError(DBError): + def __init__(self, pgexc): + super(DBCommitError, self).__init__(pgexc) + + +class DBDataLineError(DBError): + '''Database generated an error while the processor was running.''' + + def __init__(self, udl, pgexc): + ''' + udl An UploadDataLine instance + pgexc The psycopg2 exception object + ''' + super(DBDataLineError, self).__init__(pgexc) + self.lineno = udl.lineno + self.data = udl.raw + + +class DataLineError(UploadError): + ''' + Module exceptions rasied while line-by-line processing the uploaded + data are derived from this class. + + lineno The line number + e The error message + descr More description of the error + detail Extra HTML describing the error + data The uploaded data + ''' + def __init__(self, lineno, e, descr='', detail='', data=''): + super(DataLineError, self).__init__(e, lineno, descr, detail, data) + + +class TooManyColsError(DataLineError): + def __init__(self, lineno, e, descr='', detail='', data=''): + super(TooManyColsError, self).__init__(lineno, e, descr, detail, data) + + +# Upload processing + +class SQLCommand(object): + ''' + An SQL command that returns nothing + + Attributes: + stmt The statement, formatted for psycopg2 substitution + args Tuple of arguments used to substitute when executed. + ''' + def __init__(self, stmt, args, ec=None): + ''' + stmt The statement, formatted for psycopg2 substitution + args Tuple of arguments used to substitute when executed. + ec(ex) Produces the exception to raise an instance of on failure + Input: + ex The exception raised by psycopg2 + ''' + super(SQLCommand, self).__init__() + self.stmt = stmt + self.args = args + self.ec = ec + + def execute(self, cur): + ''' + Execute the sql statement. + + Input: + cur A psycopg2 cursor + + Side effects: + Does something in the db. + Can raise a psycopg2 error + ''' + try: + cur.execute(self.stmt, self.args) + except psycopg2.DatabaseError as ex: + if self.ec is None: + raise ex + else: + raise self.ec(ex) + + +class LogSQLCommand(SQLCommand): + '''An SQL command that logs success or failure.''' + def __init__(self, stmt, args, ec=None, + log_success=None, log_failure=None): + ''' + stmt The statement, formatted for psycopg2 substitution + args Tuple of arguments used to substitute when executed. + ec(ex) Produces the exception to raise an instance of on failure + Input: + ex The exception raised by psycopg2 + ''' + super(LogSQLCommand, self).__init__(stmt, args, ec) + self.log_success = log_success + self.log_failure = log_failure + + def execute(self, cur): + ''' + Execute the sql statement. + + Input: + cur A psycopg2 cursor + + Side effects: + Does something in the db. + Can raise a psycopg2 error + ''' + try: + super(LogSQLCommand, self).execute(cur) + except (UploadError, psycopg2.DatabaseError) as ex: + if self.log_failure: + self.log_failure(ex) + raise + else: + if self.log_success: + self.log_success() + + +class UploadLine(object): + ''' + Representation of a generic uploaded line + + Responsbile for getting data out of uploaded lines. + + Once out, the following attributes provide the user API: + raw The "raw" line as a string + tuples A sequence, the line split into columns + ''' + def __init__(self, line, stol, mapper): + ''' + line Uploaded line + lineno Line number + stol(st) Parse string st and convert into list + mapper(st) Maps st onto desired python representation + (Used for trimming whitespace) + ''' + super(UploadLine, self).__init__() + self.raw = line + self.tuples = [mapper(st) for st in stol(line)] + + +def doublequote(st): + ''' + Put string in double quotes escaping according to sql's + requirements for an identifier. + + Note that this does not work for unicode identifiers. + ''' + out = '"' + for ch in st: + if ch == '"': + out += '""' + else: + out += ch + return out + '"' + + +class UploadHeaders(UploadLine): + '''Uploaded heading line + + Derived from UploadLine + + raw The "raw" line as a string + tuples As a sequence + sql As a sql command fragment string + ''' + + def __init__(self, line, stol, mapper): + + if mapper(line) == '': + raise NoHeadersError('No column headings found on first line', + 'The first line is ({0})'.format(line)) + + super(UploadHeaders, self).__init__(line, stol, mapper) + self.sql = ', '.join(['"' + doublequote(st) + '"' + for st in self.tuples]) + + +class UploadDataLine(UploadLine): + '''Uploaded data line + + Derived from UploadLine + + raw The "raw" line as a string + tuples As a sequence + lineno The line number + ''' + def __init__(self, line, lineno, stol, mapper): + super(UploadDataLine, self).__init__(line, stol, mapper) + self.lineno = lineno + + +class DBData(object): + '''Data that is to modify the db. + + It is iterable, consisting of thunks which make an UploadedLine instance. + + lineno Number of lines processed (including header line) + + ''' + def __init__(self): + self.lineno = 0 + + def __iter__(self): + return self + + def __next__(self): + ''' + Iterator to return a thunk which, when called, delivers the + next object to be loaded into the db.''' + self.lineno += 1 + return self._thunk() + + def _thunk(): + ''' + Return a thunk which, when called, delivers the next + object to go into the db. + ''' + raise NotImplementedError() + + +class SQLData(DBData): + ''' + SQL statements returning no data that execute in the db. + + Attributes: + stmts List of SQLCommand instances + ''' + def __init__(self, stmts): + ''' + stmts List of SQLCommand instances + ''' + super(SQLData, self).__init__() + self.stmts = stmts + + def gen(stmts): + for stmt in stmts: + yield stmt + + self.stmt_gen = gen(stmts) + + def _thunk(self): + stmt = next(self.stmt_gen) + return lambda: stmt + + +class UploadData(DBData): + '''Uploaded data file + + It is iterable, consisting of thunks which make an UploadedLine instance. + + headers UploadedHeaders instance + cols Number of columns + lineno Number of lines processed (including header line) + + ''' + def __init__(self, fileo, file_fmt, null_data, null_rep, trim=True): + ''' + fileo Uploaded file object + file_fmt File format: CSV or TAB + null_data (boolean) Uploaded data contains nulls + null_rep Uploaded string representation of null + trim (boolean) Trim leading and trailing whitespace? + ''' + + def get_headers(): + try: + line = next(self._fileo) + except StopIteration: + raise NoDataError('Uploaded file contains no data') + else: + self.lineno += 1 + # Intuit the eol sequence + if line[-2:] == '\r\n': + eol = '\r\n' + line = line[:-2] + elif line[-1:] == '\n': + eol = '\n' + line = line[:-1] + elif line[-1:] == '\r': + eol = '\r' + line = line[:-1] + else: + # Give up + eol = '' + + if file_fmt == form_constants.CSV: + def func(st): + return next(csv_reader((st,))) + else: + def func(st): + return st.split('\t') + return (eol, UploadHeaders(line, func, do_trim)) + + super(UploadData, self).__init__() + + if trim: + def do_trim(st): + return st.strip() + else: + def do_trim(st): + return st + + # Read lines as text with line endings intact. + # newline='' required by csv.reader() + self._fileo = io.TextIOWrapper(fileo, newline='') + + # Get column headings + eol, self.headers = get_headers() + neg_eollen = -len(eol) + self.cols = len(self.headers.tuples) + + # Create parser to read raw lines into a list + if file_fmt == form_constants.CSV: + self._parser = lambda st: self._extend(st, + next(csv_reader((st,)))) + else: # Tab delimited format + def tab_parser(line): + if neg_eollen and line[neg_eollen:] == eol: + line = line[:neg_eollen] + return self._extend(line, line.split('\t')) + self._parser = tab_parser + + # Create mapper to frob data elements + if null_data: + null_rep = do_trim(null_rep) + + def mapper(st): + st = do_trim(st) + # psycopg2 maps None to NULL + return None if st == null_rep else st + self._mapper = mapper + else: + self._mapper = do_trim + + def _thunk(self): + ''' + Return a thunk which, when called, delivers the + UploadDataLine of the next line of the uploaded file.. + ''' + line = next(self._fileo) + return lambda: UploadDataLine(line, + self.lineno, + self._parser, + self._mapper) + + def _extend(self, line, seq): + '''Give the list as many elements as there are in the header. + + line line as string + seq line as sequence + + If there's too many elements, raise an error. + ''' + if len(seq) > self.cols: + raise TooManyColsError(self.lineno, + 'Line has too many columns', + 'More columns than column headings', + data=line) + return seq + ['' for i in range(len(seq) + 1, self.cols)] + + +class DataLineProcessor(object): + ''' + A processor supplied uploaded lines (UploadDataLine instances) + by an UploadEngine. The lines are expected to be put into a pg db. + + Abstract class not expected to be instantiated. + + Attributes: + ue UploadEngine instance + uh UploadHandler instance + cur psycopg2 cursor + + Methods: + eat(udl) Given an UploadDataLine instance put the line in the db. + ''' + def __init__(self, ue, uh): + ''' + ue UploadEngine instance + uh UploadHandler instance + ''' + super(DataLineProcessor, self).__init__() + self.ue = ue + self.uh = uh + self.cur = ue.cur + + def eat(udl): + ''' + Update the db with the content of an uploaded line + + udl An UploadDataLine instance + ''' + raise NotImplementedError + + +class NoOpProcessor(DataLineProcessor): + '''A processor that discards it's input.''' + def __init__(self, ue, uh): + ''' + ue UploadEngine instance + uh UploadHandler instance + cur psycopg2 cursor + ''' + super(NoOpProcessor, self).__init__(ue, uh) + + def eat(self, udl): + ''' + Executes an sql command in the db. + + udl A DataLine instance. + ''' + pass + + +class ExecuteSQL(DataLineProcessor): + def __init__(self, ue, uh): + ''' + ue UploadEngine instance + uh UploadHandler instance + cur psycopg2 cursor + ''' + super(ExecuteSQL, self).__init__(ue, uh) + + def eat(self, sqlc): + ''' + Executes an sql command in the db. + + sqlc An SQLCommand instance (a command and it's args) + ''' + sqlc.execute(self.cur) + + +class DBHandler(object): + ''' + Abstract class of a handler that modifies the db. + + The main logic goes here, including getting the data + into and out of the db. + + Responsible for: + Telling the form (LoadedForm) to get user data + Retrieval of data file or generation of other data to go in the db + (Generally involving an UploadData instance, and hence an UploadLine + instance.) + Validation of user supplied data; error raising: + The html form data + Whatever other data is uploaded or otherwise passed in + Pre-processing to prepare for getting data into the db + (Pre-processing is referred to as "setup" presently.) + Getting the individual bits of data into and out of the db + (by way of having a factory that produces a DataLineProcessor) + Post-processing to wrap up after the data is in the db + (Post-processing is referred to as "cleanup" presently.) + Rendering the html output + + Attributes: + request A pyramid request instance + uf An UploadForm instance + data Something (beyond what's on the form) that goes into db + ''' + def __init__(self, request): + ''' + request A pyramid request instance + ''' + super(DBHandler, self).__init__() + self.request = request + self.uf = self.make_form() + + def make_form(self): + ''' + Return an instantiation of the upload form needed + by the upload handler. + ''' + raise NotImplementedError + + def get_data(self): + ''' + Put something that will go into the db into the 'data' attribute. + ''' + raise NotImplementedError + + def val_input(self): + ''' + Validate input needed beyond that required to connect to the db. + + Note that this occurs after read() is called. + + This is expected to be replaced by it's subclass. + ''' + return [] + + def read(self): + '''Read the html form into the upload form instance''' + self.uf.read() + + def write(self, result, errors): + ''' + Return dict needed to render an html form + + Input: + errors List of Error instances + + Returns: + Dict pyramid will use to render the resulting form + Reserved keys: + errors A list of UploadError exceptions. + ''' + return self.uf.write(result, errors) + + def cleanup(self): + ''' + Called after all lines are processed to do any final + updates to the db. + ''' + pass + + def render(self, errors, response): + ''' + Return to pyramid what's needed to render the appropriate html + page. + + Input: + errors List of Error instances + response Db connection result dict + + Returns: + Dict pyramid will use to render the resulting form + ''' + response = self.write(response, errors) + return response + + +class SessionDBHandler(DBHandler): + ''' + A DBHandler that supports sessions. + + Attributes: + uf An UploadForm instance + session A pyramid session instance + ''' + def __init__(self, request): + ''' + request A pyramid request instance + ''' + super(SessionDBHandler, self).__init__(request) + self.session = self.request.session + + def write(self, result, errors): + ''' + Add csrf token into html form to protect sessions. + + Input: + errors List of Error instances + + Returns: + Dict pyramid will use to render the resulting form + Reserved keys: + errors A list of UploadError exceptions. + csrf_token Token for detecting CSRF. + ''' + response = super(SessionDBHandler, self).write(result, errors) + response['csrf_token'] = self.session.new_csrf_token() + return response + + +class UploadHandler(SessionDBHandler): + ''' + Handler for uploading a file. + + Attributes: + request A pyramid request instance + uf An UploadForm instance + data (optional) A DBData instance + ''' + def __init__(self, request): + ''' + request A pyramid request instance + ''' + super(UploadHandler, self).__init__(request) + + def factory(self, ue): + ''' + Takes an UploadEngine instance + Returns a DataLineProcessor instance + ''' + raise NotImplementedError + + def val_input(self): + ''' + Validate input needed beyond that required to connect to the db. + + Returns: + A list of Error instances + ''' + uf = self.uf + errors = super(UploadHandler, self).val_input() + + if uf['filename'] == '': + errors.append(NoFileError('No file supplied')) + + return errors + + def double_validator(self, errors): + '''Utility function that can optionally be called by + a val_input() function. It checks that the same file + has not been uploaded twice in succession to the same + db. + + `errors` + List of errors. Appended to. + ''' + uf = self.uf + if self.make_double_key() == uf['last_key']: + errors.append(DuplicateUploadError( + 'File just uploaded to this db', + ('File named ({0}) just uploaded' + .format(markupsafe.escape(uf['filename']))), + '

Try again to force upload.

')) + + def hash_sequence(self, seq): + '''Produce a hash of the elements of a sequence.''' + hash = hashlib.md5() + for item in seq: + hash.update((hash.hexdigest() + item).encode('utf-8')) + return hash.hexdigest() + + def make_double_key(self): + '''Generate unique value which identifies a double upload.''' + uf = self.uf + return self.hash_sequence((uf['db'], uf['filename'])) + + def write_double_key(self, response): + '''Utility function. Optionally called from within write() + to save a key which is later tested for to determine if + the same file is being uploaded twice in a row. + + This function keys by filename and db. + + The key is stored in the form and submitted in POST. + This way if the session times out due to inactivity + the double upload is still detected. + + Since the data is delivered to the client and the + client will likely see it anyway we use a hash not + for security but just to provide a convenient single + value that can be tested. + + Input: + response Dict used by pyramid to render html form. + + Side effects: + Modifies response. Adds 'last_key' entry used by form to store key. + ''' + response['last_key'] = self.make_double_key() + + def write(self, result, errors): + ''' + Add lines processed and number of errors into html form + shown the user. + + Input: + errors List of Error instances + + Returns: + Dict pyramid will use to render the resulting form + Reserved keys: + errors A list of UploadError exceptions. + csrf_token Token for detecting CSRF. + e_cnt Number of errors. + db_changed Boolean. Whether the db was changed. + ''' + response = super(UploadHandler, self).write(result, errors) + if hasattr(self, 'data'): + response['lines'] = self.data.lineno - 1 + response['e_cnt'] = len(errors) + response['db_changed'] = (not response['errors'] + and self.uf['action'] != '') + return response + + +class TabularFileUploadHandler(UploadHandler): + ''' + Handler for uploading a file with rows and columns and column headings. + + Attributes: + request A pyramid request instance + uf An UploadForm instance + data An UploadData instance + ''' + def __init__(self, request): + ''' + request A pyramid request instance + ''' + super(TabularFileUploadHandler, self).__init__(request) + + def cleanup(self): + '''Finish after processing all lines.''' + lines = self.ue.data.lineno + if lines == 1: + raise DataLineError( + 1, + 'File contains no data', + ('No lines found after ' + 'initial line of column headings')) + + +class DBConnector(object): + '''Abstract class supporting db connection and execution of a + processor function that alters the db content. + + Responsible for connecting to the db, transaction management, + and running the DataLineProcessor instances produced by the + DBConnector's (the UploadHandler's) factory. + Collects all errors raised during the processing. + Telling the uploadhandler to cleanup. This is done after + all upload processing and before commit or rollback + (and before the connection is closed). + + Attributes: + uh An UploadHandler instance. + cur A psycopg2 cursor instance + db Name of db to connect to + user User to connect to db + password Password to connect to db + action Upload form's hidden action variable + '' means not submitted, anything else means take action + + Methods: + run() Get a DataLineProcessor instance from the upload handler's + factory and feed it by iterating over data. + ''' + + def __init__(self, uh): + ''' + uh An UploadHandler instance + ''' + super(DBConnector, self).__init__() + + # Configuration and response management. + self.uh = uh + + def call_alter_db(self, conn): + ''' + Alter the contents of the db. + + Input: + conn A database connection + + Returns: + A list of Error instances + + Side Effects: + Alter's db + ''' + + raise NotImplementedError + + def no_connection_response(self): + ''' + The result dict to use when call_with_connection is not called. + Matches with the result dict of call_with_connection + + Returns: + Dict pyramid will use to render the resulting form. + Reserved keys: + havecreds Boolean. Supplied credentials can connect to db. + ''' + return {'havecreds': False} + + def nodberror_factory(self): + return NoDBError('No database name supplied') + + def nousererror_factory(self): + return NoUserError('No user name supplied as login credentials') + + def authfailerror_factory(self): + return AuthFailError('Unable to login', + 'Is the database, user, and password correct?') + + def dryrunerror_factory(self): + return DryRunError('Configured for "dry_run":' + ' Transaction deliberately rolled back') + + def upload_data(self, data, errors): + '''Put a DBData object into the db. + + Input: + data A DBData object. + errors A list of errors + + Side Effects: + Alters db content + Add errors to `errors`. + ''' + raise NotImplementedError + + def alter_db(self, conn): + ''' + Alter the contents of the db. + + Input: + conn A database connection + + Returns: + A list of Error instances + + Side Effects: + Alter's db + ''' + + errors = [] + try: + self.uh.get_data() + self.data = self.uh.data + self.cur = conn.cursor() + # (Cannot call uh until after self is fully + # initalized, including self.cur.) + processor = self.uh.factory(self) + except Error as ex: + errors.append(ex) + else: + try: + self.upload_data(processor, self.data, errors) + # Let upload handler finish + try: + self.uh.cleanup() + except UploadError as ex: + errors.append(ex) + finally: + self.cur.close() + return errors + + def call_with_connection(self, func): + ''' + Validate input, connect to the db, and do something with + the connection. + + func(conn) Call this function with the connection. + func(conn) must return a list of Error instances + + Returns: + (errors, response) + errors List of Error instances + response Dict pyramid will use to render the resulting form. + The dict returned by func(conn) plus reserved keys. + Reserved keys: + havecreds Boolean. Supplied credentials can + connect to db. + Side effects: + Raises errors, calls func(conn) + ''' + errors = [] + havecreds = False + response = {} + + if self.db == '': + errors.append(self.nodberror_factory()) + + if self.user == '': + errors.append(self.nousererror_factory()) + + newes = self.uh.val_input() + if newes: + if not errors: + havecreds = True + errors.extend(newes) + + if not errors: + registry = self.uh.request.registry + try: + conn = psycopg2.connect( + database=self.db, + user=self.user, + password=self.password, + host=registry.settings['pg_host'], + port=registry.settings['pg_port']) + except psycopg2.OperationalError: + errors = [self.authfailerror_factory()] + havecreds = False + else: + havecreds = True + errors = self.call_alter_db(func, conn) + + response.update({'havecreds': havecreds + }) + return (errors, response) + + def read_uh(self): + '''Read data into the upload handler.''' + self.uh.read() + + def read_db(self): + '''Read the db from the upload handler.''' + self.db = self.uh.uf['db'] + + def read_user(self): + '''Read the username from the upload handler.''' + self.user = self.uh.uf['user'] + + def read_password(self): + '''Read the password from the upload handler.''' + if 'password' in self.uh.uf: + self.password = self.uh.uf['password'] + else: + self.password = '' + + def read_action(self): + '''Read the action from the upload handler.''' + self.action = self.uh.uf['action'] + + def read(self): + '''Allows for custom engines to map what the handler + has to what they need. + ''' + self.read_uh() + self.read_db() + self.read_user() + self.read_password() + self.read_action() + + def run(self): + '''Call the alter_db function with the connection to alter the db, + render form and return Response + + Input: + + Returns: + (errors, response) + errors List of Error instantiations + response Dict containing connection result info + + Side effects: + Maintains session + (Whatever the processor() call does) + Outputs html + ''' + self.read() + + if self.action == 'u': + errors, response = self.call_with_connection(self.alter_db) + else: + errors = [] + response = self.no_connection_response() + + return self.uh.render(errors, response) + + +class NoTransactionEngine(DBConnector): + '''An engine that does not run things in a transaction. + + It also does not do CSRF checking. + + Attributes: + uh An UploadHandler instance. + data An UploadData instance of the uploaded data + cur A psycopg2 cursor instance + db Name of db to connect to + user User to connect to db + password Password to connect to db + action Upload form's hidden action variable + '' means not submitted, anything else means take action + + Methods: + run() Get a DataLineProcessor instance from the upload handler's + factory and feed it by iterating over data. + ''' + def __init__(self, uh): + ''' + uh An UploadHandler instance + ''' + super(NoTransactionEngine, self).__init__(uh) + + def call_alter_db(self, func, conn): + ''' + Call a database modification function with a db connection, + turning off the automatic wrapping of the activity in + a transaction. + + func(conn) Call this function with the connection. + func(conn) must return a list of Error instances + + Returns: + errors List of Error instances + Side effects: + Calls func(conn) + ''' + conn.set_session(autocommit=True) + errors = func(conn) + conn.close() + return errors + + def upload_data(self, processor, data, errors): + '''Put a DBData object into the db, without a wrapping transaction. + + Input: + processor A DataLineProcessor object. + data A DBData object. + errors A list of errors + + Side Effects: + Alters db content + Add errors to `errors`. + ''' + for thunk in data: + try: + udl = thunk() + except DataLineError as ex: + errors.append(ex) + else: + try: + processor.eat(udl) + except psycopg2.DatabaseError as ex: + errors.append(DBDataLineError(udl, ex)) + except DataLineError as ex: + errors.append(ex) + except DBError as ex: + errors.append(ex) + + +class UnsafeUploadEngine(DBConnector): + '''Abstract class supporting repeatedly running a processer to put + uploaded data into db. + + Attributes: + uh An UploadHandler instance. + data An UploadData instance of the uploaded data + cur A psycopg2 cursor instance + db Name of db to connect to + user User to connect to db + password Password to connect to db + action Upload form's hidden action variable + '' means not submitted, anything else means take action + + Methods: + run() Get a DataLineProcessor instance from the upload handler's + factory and feed it by iterating over data. + eat_old_line(udl, thunk) + Trap errors raised by the db while running thunk. + Report any errors as due to the udl UploadDataLine + instance. + ''' + def __init__(self, uh): + ''' + uh An UploadHandler instance + ''' + super(UnsafeUploadEngine, self).__init__(uh) + + def call_alter_db(self, func, conn): + ''' + Call a database modification function with a connection. + + func(conn) Call this function with the connection. + func(conn) must return a list of Error instances + + Returns: + errors List of Error instances + Side effects: + Calls func(conn) + ''' + errors = func(conn) + + if errors: + conn.rollback() + else: + if ast.literal_eval(self.uh.request.registry.settings['dry_run']): + conn.rollback() + errors.append(self.dryrunerror_factory()) + else: + try: + conn.commit() + except psycopg2.DatabaseError as ex: + errors.append(DBCommitError(ex)) + conn.close() + return errors + + def eat_old_line(self, udl, thunk): + ''' + Run thunk and report any errors raised by the db as due to + the UploadDataLine instance udl. + + Returns: + The result of running thunk. + ''' + try: + result = thunk() + except psycopg2.DatabaseError as ex: + raise DBDataLineError(udl, ex) + else: + return result + + def upload_data(self, processor, data, errors): + '''Put a DBData object into the db. + + Input: + processor A DataLineProcessor object. + data A DBData object. + errors A list of errors + + Side Effects: + Alters db content + Add errors to `errors`. + ''' + for thunk in data: + try: + udl = thunk() + except DataLineError as ex: + errors.append(ex) + else: + self.cur.execute( + 'SAVEPOINT line_savepoint;') + try: + processor.eat(udl) + except psycopg2.DatabaseError as ex: + self.cur.execute( + 'ROLLBACK TO line_savepoint;') + errors.append(DBDataLineError(udl, ex)) + except DataLineError as ex: + self.cur.execute( + 'ROLLBACK TO line_savepoint;') + errors.append(ex) + except DBError as ex: + self.cur.execute( + 'ROLLBACK TO line_savepoint;') + errors.append(ex) + else: + self.cur.execute( + 'RELEASE line_savepoint;') + + +class UploadEngine(UnsafeUploadEngine): + '''Abstract class supporting repeatedly running a processor + to alter db content. Adds cross-site request forgery checking + to the UnsafeUploadEngine class. + + Attributes: + uh An UploadHandler instance. + cur A psycopg2 cursor instance + db Name of db to connect to + user User to connect to db + password Password to connect to db + action Upload form's hidden action variable + '' means not submitted, anything else means take action + csrf_token The client's csrf_token + + Methods: + run() Get a DataLineProcessor instance from the upload handler's + factory and feed it by iterating over data. + ''' + + def __init__(self, uh): + ''' + uh An UploadHandler instance + ''' + super(UploadEngine, self).__init__(uh) + + def csrferror_factory(self): + return CSRFError( + 'Your request failed and you are now logged out', + ('This is a security measure. ' + 'Some possible causes are:'), + ('
    ' + '
  • Cookies are off in the browser
  • ' + '
  • Reload of an expired page
  • ' + '
      ' + '
    • Use of the Back button to (re)submit' + ' an old page
    • ' + '
    • An attacker has crafted a page which resubmits' + ' one of your old page submissions
    • ' + '
    ' + '
  • Server restart
  • ' + '
  • Login session expired due to inactivity
  • ' + '
')) + + def read_csrf_token(self): + '''Save the csrf token.''' + post = self.uh.request.POST + if 'csrf_token' in post: + self.csrf_token = post['csrf_token'] + else: + self.csrf_token = None + + def read(self): + '''Add a csrf_token.''' + super(UploadEngine, self).read() + self.read_csrf_token() + + def call_with_connection(self, func): + ''' + Connect to the db. + + func(conn) Call this function with the connection. + f(conn) must return a (errors, dict) tuple result, + errors list of Error instances + dict other results + + Returns: + (errors, response) + errors List of Error instances + response Dict pyramid will use to render the resulting form. + The dict returned by func(conn) plus reserved keys. + Reserved keys: + havecreds Boolean. Supplied credentials can + connect to db. + session_expired Boolean. Session expired. + Side effects: + Raises errors, calls func(conn) + ''' + session = self.uh.session + + token = session.get_csrf_token() + if token != self.csrf_token: + session.invalidate() + response = self.no_connection_response() + response['session_expired'] = True + return([self.csrferror_factory()], + response) + else: + (errors, response) = (super(UploadEngine, self) + .call_with_connection(func)) + response['session_expired'] = False + return (errors, response) diff --git a/src/pgwui_core/pgwui_core.py b/src/pgwui_core/pgwui_core.py deleted file mode 100644 index fe14ccf..0000000 --- a/src/pgwui_core/pgwui_core.py +++ /dev/null @@ -1,1969 +0,0 @@ -# Copyright (C) 2013, 2014, 2015, 2018 The Meme Factory, Inc. -# http://www.meme.com/ - -# This file is part of PGWUI_Core. -# -# This program is free software: you can redistribute it and/or -# modify it under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of -# the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public -# License along with this program. If not, see -# . -# - -# Karl O. Pinc - -'''Python source code for pgwui_core module - -More general description of the module. -''' - -# There are main objects, and their subclasses, here: -# LoadedForm -# DBHandler (generally referred to a an "upload handler", at present) -# DBConnector (UploadEngine) -# -# See their documentation below. - -# Write python 3 compatible code. -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division - -from csv import reader as csv_reader -from cgi import escape as cgi_escape -import collections -import ast -import markupsafe -import hashlib -import io - -# We are not really using wtforms. We use it to (barely) -# interact with the html and post request but really -# we define our own classes to handle working memory -# and interacting with the session. -from wtforms import ( - Form, - BooleanField, - StringField, - RadioField, - PasswordField, - FileField) - -import psycopg2 - -from pgwui_core import form_constants - - -# Setup default values for forms. - -class UserInitialPost(): - db = form_constants.LIVE_DB - user = '' - password = '' - - -class AuthInitialPost(): - db = '' - user = '' - password = '' - - -class UploadFileInitialPost(AuthInitialPost): - upload_fmt = form_constants.CSV - datafile = '' - trim_upload = True - - -class UploadNullFileInitialPost(UploadFileInitialPost): - upload_null = True - null_rep = '' - - -class UploadTableInitialPost(UploadNullFileInitialPost): - table = '' - - -# The wtforms that suck data out of the html. - -class UserWTForm(Form): - '''The wtform used to connect to the db to authenticate .''' - # We don't actually use the labels, wanting the template to - # look (and render) like html, but I'll define them anyway - # just to keep my hand in. - user = StringField('User:') - password = PasswordField('Password:') - - ivals = UserInitialPost - - -class AuthWTForm(UserWTForm): - '''The wtform used to connect to any db and authenticate.''' - # We don't actually use the labels, wanting the template to - # look (and render) like html, but I'll define them anyway - # just to keep my hand in. - db = StringField('Database:') - - ivals = AuthInitialPost - - -class UploadFileWTForm(AuthWTForm): - '''The wtform used for uploading files.''' - # We don't actually use the labels, wanting the template to - # look (and render) like html, but I'll define them anyway - # just to keep my hand in. - upload_fmt = RadioField('Upload Format:', - choices=[('Upload CSV Data:', form_constants.CSV), - ('Upload tab delimited Data:', - form_constants.TAB)]) - datafile = FileField('File with CSV or Tab delimited Data:') - trim_upload = BooleanField('Trim Leading/Trailing Spaces:') - - ivals = UploadFileInitialPost - - -class UploadNullFileWTForm(UploadFileWTForm): - '''The wtform used for uploading files that may contain NULL.''' - # We don't actually use the labels, wanting the template to - # look (and render) like html, but I'll define them anyway - # just to keep my hand in. - upload_null = BooleanField('Upload NULL Values:') - null_rep = StringField('NULL Representation:') - - ivals = UploadNullFileInitialPost - - -class UploadTableWTForm(UploadNullFileWTForm): - '''The wtform used for uploading arbitrary data into tables.''' - table = StringField('Table or View:') - - ivals = UploadTableInitialPost - - -class LoadedForm(collections.MutableMapping): - ''' - Abstract class representing an upload form. - - Responsible for getting information into and out of - html forms. - - The user API is that it acts like a dict, but with extra methods. - - Attributes: - uh The UploadHandler instance using the form - - _store Where the real dict is kept - _form Instantaiated html form object (WTForms) - _fc Class handling html form - ''' - def __init__(self, uh, fc=None, data={}, **kwargs): - super(LoadedForm, self).__init__() - self.uh = uh - if data == {}: - store = dict(kwargs) - else: - store = dict(data) - store.update(kwargs) - self._store = store - self._fc = fc - - def __iter__(self): - return next(self._store) - - def __len__(self): - return len(self._store) - - def __getitem__(self, key): - return self._store[key] - - def __setitem__(self, key, value): - self._store[key] = value - - def __delitem__(self, key): - del self._store[key] - - def read(self): - ''' - In the children this loads form from pyramid self.uh.request - object and self._form and the session. - - In this case we instantiate _form and give it some defaults - ''' - self._form = self._fc(self.uh.request.POST, self._fc.ivals) - - def write(self, response, errors): - ''' - Produces the dict pyramid will use to render the form. - - Input: - response Dict of results from connection execution - errors List of errors from connection execution - ''' - response['errors'] = errors - return response - - -class CredsLoadedForm(LoadedForm): - ''' - Acts like a dict, but with extra methods. - Manages credentials (but not db) needed to authenticate. - - Attributes: - uh The UploadHandler instance using the form - user The username used to login - _form Instantaiated html form object (WXForms) - - Methods: - read() Load form from pyramid request object. - ''' - def __init__(self, uh, fc=UserWTForm, data={}, **kwargs): - data.update(kwargs) - super(CredsLoadedForm, self).__init__(uh, fc, data) - - def session_put(self, key, value): - ''' - Put data into the session. - - Input: - key The key - value The value - - Returns: - - Side effects: - Modifies session - - May be overridden by a subclass to keep data out - of the session. - ''' - self.uh.session[key] = value - - def read(self): - ''' - Read form data from the client - ''' - - # Read parent's data - super(CredsLoadedForm, self).read() - - # Read our form data - - # Keep password and user in the session. All the other - # form varaibles must be re-posted. - post = self.uh.request.POST - session = self.uh.request.session - - # Defaults are now in place in self._form for password - # and user. Ignore these since we want to know whether - # to go to the session for data values. - if 'password' in post: - self['password'] = post['password'] - self.session_put('password', self['password']) - elif 'password' in session: - self['password'] = session['password'] - - if 'user' in post: - self['user'] = post['user'] - self.session_put('user', self['user']) - elif 'user' in session: - self['user'] = session['user'] - else: - self['user'] = '' - - # Other, hidden, POST variables - if 'action' in post: - self['action'] = post['action'] - else: - self['action'] = '' - - def write(self, result, errors): - ''' - Produces the dict pyramid will use to render the form. - ''' - response = super(CredsLoadedForm, self).write(result, errors) - if ('havecreds' not in response - or ('havecreds' in response and not response['havecreds'])): - # We don't know if the credentials are good or - # we know they are bad. Keep them out of the session. - response['user'] = '' - response['password'] = '' - self.session_put('user', '') - self.session_put('password', '') - else: - response['user'] = self['user'] - return response - - -class AuthLoadedForm(CredsLoadedForm): - ''' - Acts like a dict, but with extra methods. - Manages form data needed to authenticate, including db to authenticate - in. - - Attributes: - uh The UploadHandler instance using the form - user The Usernamed used to login - db The db to login to - _form Instantaiated html form object (WXForms) - - ''' - def __init__(self, uh, fc=AuthWTForm, data={}, **kwargs): - data.update(kwargs) - super(AuthLoadedForm, self).__init__(uh, fc, data) - - def read(self): - ''' - Read form data from the client - ''' - - # Read parent's data - super(AuthLoadedForm, self).read() - - # Keep form variables handy - self['db'] = self._form.db.data - - def write(self, result, errors): - ''' - Produces the dict pyramid will use to render the form. - ''' - response = super(AuthLoadedForm, self).write(result, errors) - response['db'] = self['db'] - return response - - -class UploadFileForm(AuthLoadedForm): - ''' - Acts like a dict, but with extra methods. - - Attributes: - uh The UploadHandler instance using the form - - Methods: - read() Load form from pyramid request object. - ''' - def __init__(self, uh, fc=UploadFileWTForm, data={}, **kwargs): - data.update(kwargs) - super(UploadFileForm, self).__init__(uh, fc, data) - - def read(self): - ''' - Read form data from the client - ''' - - # Read parent's data - super(UploadFileForm, self).read() - - # Read our own data - self['upload_fmt'] = self._form.upload_fmt.data - self['trim_upload'] = self._form.trim_upload.data - - # Other POST variables involving a file - self['filename'] = '' - self['localfh'] = '' - if 'action' in self: - if self._form.datafile.data != '': - post = self.uh.request.POST - if hasattr(post['datafile'], 'filename'): - self['filename'] = post['datafile'].filename - if hasattr(post['datafile'], 'file'): - self['localfh'] = post['datafile'].file - - def write(self, result, errors): - ''' - Produces the dict pyramid will use to render the form. - ''' - if self['upload_fmt'] == form_constants.CSV: - csv_checked = form_constants.CHECKED - tab_checked = form_constants.UNCHECKED - else: - tab_checked = form_constants.CHECKED - csv_checked = form_constants.UNCHECKED - - if self['trim_upload']: - trim_upload_checked = form_constants.CHECKED - else: - trim_upload_checked = form_constants.UNCHECKED - - response = super(UploadFileForm, self).write(result, errors) - response['filename'] = self['filename'] - response['trim_upload'] = trim_upload_checked - response['csv_value'] = form_constants.CSV_VALUE - response['tab_value'] = form_constants.TAB_VALUE - response['csv_checked'] = csv_checked - response['tab_checked'] = tab_checked - return response - - -class UploadDoubleFileForm(UploadFileForm): - ''' - Acts like a dict, but with extra methods. - - Attributes: - uh The UploadHandler instance using the form - - Methods: - read() Load form from pyramid request object. - ''' - def __init__(self, uh, fc=UploadFileWTForm, data={}, **kwargs): - data.update(kwargs) - super(UploadDoubleFileForm, self).__init__(uh, fc, data) - - def read(self): - ''' - Read form data from the client - ''' - # Read parent's data - super(UploadDoubleFileForm, self).read() - - # Read our own data - post = self.uh.request.POST - if 'last_key' in post: - self['last_key'] = post['last_key'] - else: - self['last_key'] = '' - - def write(self, result, errors): - ''' - Produces the dict pyramid will use to render the form. - ''' - response = super(UploadDoubleFileForm, self).write(result, errors) - response['last_key'] = self['last_key'] - return response - - -class UploadNullFileForm(UploadFileForm): - ''' - Acts like a dict, but with extra methods. - - Attributes: - uh The UploadHandler instance using the form - - Methods: - read() Load form from pyramid request object. - ''' - def __init__(self, uh, fc=UploadNullFileWTForm, data={}, **kwargs): - data.update(kwargs) - super(UploadNullFileForm, self).__init__(uh, fc, data) - - def read(self): - ''' - Read form data from the client - ''' - - # Read parent's data - super(UploadNullFileForm, self).read() - - # Read our own data - self['upload_null'] = self._form.upload_null.data - self['null_rep'] = self._form.null_rep.data - - def write(self, result, errors): - ''' - Produces the dict pyramid will use to render the form. - ''' - if self['upload_null']: - upload_null_checked = form_constants.CHECKED - else: - upload_null_checked = form_constants.UNCHECKED - - response = super(UploadNullFileForm, self).write(result, errors) - response['upload_null'] = upload_null_checked - response['null_rep'] = self['null_rep'] - return response - - -class UploadTableForm(UploadNullFileForm): - ''' - Acts like a dict, but with extra methods. - - Attributes: - uh The UploadHandler instance using the form - - Methods: - read() Load form from pyramid request object. - ''' - def __init__(self, uh, fc=UploadTableWTForm, data={}, **kwargs): - data.update(kwargs) - super(UploadTableForm, self).__init__(uh, fc, data) - - def read(self): - ''' - Read form data from the client - ''' - - # Read parent's data - super(UploadTableForm, self).read() - - # Read our own data - self['table'] = self._form.table.data - - def write(self, result, errors): - ''' - Produces the dict pyramid will use to render the form. - ''' - response = super(UploadTableForm, self).write(result, errors) - response['table'] = self['table'] - return response - - -class UploadDoubleTableForm(UploadTableForm): - ''' - Acts like a dict, but with extra methods. - - Attributes: - uh The UploadHandler instance using the form - - Methods: - read() Load form from pyramid request object. - ''' - def __init__(self, uh, fc=UploadTableWTForm, data={}, **kwargs): - data.update(kwargs) - super(UploadDoubleTableForm, self).__init__(uh, fc, data) - - def read(self): - ''' - Read form data from the client - ''' - # Read parent's data - super(UploadDoubleTableForm, self).read() - - # Read our own data - post = self.uh.request.POST - if 'last_key' in post: - self['last_key'] = post['last_key'] - else: - self['last_key'] = '' - - def write(self, result, errors): - ''' - Produces the dict pyramid will use to render the form. - ''' - response = super(UploadDoubleTableForm, self).write(result, errors) - response['last_key'] = self['last_key'] - return response - - -# Utility functions - -def textualize(st): - ''' - Return pg representation of NULL for None when string st is None. - ''' - return 'NULL' if st is None else st - - -def is_checked(val): - '''Is the value something a html input entity recognizes as checked?''' - return val == form_constants.CHECKED - - -# Some functions for logging - -def escape_eol(string): - '''Change all the newlines to \n.''' - return string.replace('\n', r'\n') - - -def format_exception(ex): - '''Return an exception formatted as suffix text for a log message.''' - if isinstance(ex, psycopg2.DatabaseError): - diag = ex.diag - msg = diag.message_primary - if hasattr(diag, 'message_detail'): - msg += ', detail={0}'.format(escape_eol(diag.message_detail)) - if hasattr(diag, 'message_hint'): - msg += ', hint={0}'.format(escape_eol(diag.message_hint)) - elif isinstance(ex, UploadError): - msg = ex.e - if ex.descr != '': - msg += ' {0}'.format(escape_eol(ex.descr)) - if ex.detail != '': - msg += ' {0}'.format(escape_eol(ex.detail)) - else: - msg = '' - if msg != '': - msg = ': Error is ({0})'.format(msg) - return msg - - -# Error handling - -class UploadError(Exception): - ''' - Module exceptions are derived from this class. - - lineno Line number to which error pertains, if any - e The error message - descr More description of the error - detail Extra HTML describing the error - data Line of data causing problem, if any - - UploadError - * Error - * NoHeadersError - * NoDataError - * DBError - * DBCommitError - * DBDataLineError - * DataLineError - * TooManyColsError - ''' - def __init__(self, e, lineno='', descr='', detail='', data=''): - super(UploadError, self).__init__() - self.lineno = lineno - self.e = e - self.descr = descr - self.detail = detail - self.data = data - - def __str__(self): - out = 'error ({0})'.format(self.e) - if self.lineno != '': - out = '{0}: lineno ({1})'.format(out, self.lineno) - if self.descr != '': - out = '{0}: descr ({1})'.format(out, self.descr) - if self.detail != '': - out = '{0}: detail ({1})'.format(out, self.detail) - if self.data != '': - out = '{0}: data ({1})'.format(out, self.data) - return out - - -class Error(UploadError): - ''' - Module exceptions rasied while setting up to read data lines - are derived from this class. - - e The error message - descr More description of the error - detail Extra HTML describing the error - ''' - def __init__(self, e, descr='', detail=''): - super(Error, self).__init__(e=e, descr=descr, detail=detail) - - -class NoFileError(Error): - '''No file uploaded''' - def __init__(self, e, descr='', detail=''): - super(NoFileError, self).__init__(e, descr, detail) - - -class NoDBError(Error): - '''No database name given''' - def __init__(self, e, descr='', detail=''): - super(NoDBError, self).__init__(e, descr, detail) - - -class NoUserError(Error): - '''No user name supplied''' - def __init__(self, e, descr='', detail=''): - super(NoUserError, self).__init__(e, descr, detail) - - -class AuthFailError(Error): - '''Unable to connect to the db''' - def __init__(self, e, descr='', detail=''): - super(AuthFailError, self).__init__(e, descr, detail) - - -class DryRunError(Error): - '''Rollback due to dry_run config option''' - def __init__(self, e, descr='', detail=''): - super(DryRunError, self).__init__(e, descr, detail) - - -class CSRFError(Error): - '''Invalid CSRF token''' - def __init__(self, e, descr='', detail=''): - super(CSRFError, self).__init__(e, descr, detail) - - -class NoHeadersError(Error): - '''No column headings found''' - def __init__(self, e, descr='', detail=''): - super(NoHeadersError, self).__init__(e, descr, detail) - - -class NoDataError(Error): - '''No data uploaded''' - def __init__(self, e, descr='', detail=''): - super(NoDataError, self).__init__(e, descr, detail) - - -class DuplicateUploadError(Error): - '''The same filename updated twice into the same db''' - def __init__(self, e, descr='', detail=''): - super(DuplicateUploadError, self).__init__(e, descr, detail) - - -class DataInconsistencyError(Error): - def __init__(self, e, descr='', detail=''): - super(DataInconsistencyError, self).__init__(e, descr, detail) - - -class DBError(Error): - '''psycopg2 raised an error''' - def __init__(self, pgexc, e='process your request'): - ''' - pgexc The psycopg2 exception object - e Description of what PG was doing - ''' - super(DBError, self).__init__( - 'PostgreSQL is unable to ' + e + ':', - 'It reports:', - self.html_blockquote(pgexc)) - - def html_blockquote(self, ex): - ''' - Produce an html formatted message from a psycopg2 DatabaseError - exception. - ''' - primary = cgi_escape(ex.diag.message_primary) - - if ex.diag.message_detail is None: - detail = '' - else: - detail = '
DETAIL: ' + cgi_escape(ex.diag.message_detail) - - if ex.diag.message_hint is None: - hint = '' - else: - hint = '
HINT: ' + cgi_escape(ex.diag.message_hint) - - return '

{0}: {1}{2}{3}

'.format( - ex.diag.severity, - primary, - detail, - hint) - - -class DBCommitError(DBError): - def __init__(self, pgexc): - super(DBCommitError, self).__init__(pgexc) - - -class DBDataLineError(DBError): - '''Database generated an error while the processor was running.''' - - def __init__(self, udl, pgexc): - ''' - udl An UploadDataLine instance - pgexc The psycopg2 exception object - ''' - super(DBDataLineError, self).__init__(pgexc) - self.lineno = udl.lineno - self.data = udl.raw - - -class DataLineError(UploadError): - ''' - Module exceptions rasied while line-by-line processing the uploaded - data are derived from this class. - - lineno The line number - e The error message - descr More description of the error - detail Extra HTML describing the error - data The uploaded data - ''' - def __init__(self, lineno, e, descr='', detail='', data=''): - super(DataLineError, self).__init__(e, lineno, descr, detail, data) - - -class TooManyColsError(DataLineError): - def __init__(self, lineno, e, descr='', detail='', data=''): - super(TooManyColsError, self).__init__(lineno, e, descr, detail, data) - - -# Upload processing - -class SQLCommand(object): - ''' - An SQL command that returns nothing - - Attributes: - stmt The statement, formatted for psycopg2 substitution - args Tuple of arguments used to substitute when executed. - ''' - def __init__(self, stmt, args, ec=None): - ''' - stmt The statement, formatted for psycopg2 substitution - args Tuple of arguments used to substitute when executed. - ec(ex) Produces the exception to raise an instance of on failure - Input: - ex The exception raised by psycopg2 - ''' - super(SQLCommand, self).__init__() - self.stmt = stmt - self.args = args - self.ec = ec - - def execute(self, cur): - ''' - Execute the sql statement. - - Input: - cur A psycopg2 cursor - - Side effects: - Does something in the db. - Can raise a psycopg2 error - ''' - try: - cur.execute(self.stmt, self.args) - except psycopg2.DatabaseError as ex: - if self.ec is None: - raise ex - else: - raise self.ec(ex) - - -class LogSQLCommand(SQLCommand): - '''An SQL command that logs success or failure.''' - def __init__(self, stmt, args, ec=None, - log_success=None, log_failure=None): - ''' - stmt The statement, formatted for psycopg2 substitution - args Tuple of arguments used to substitute when executed. - ec(ex) Produces the exception to raise an instance of on failure - Input: - ex The exception raised by psycopg2 - ''' - super(LogSQLCommand, self).__init__(stmt, args, ec) - self.log_success = log_success - self.log_failure = log_failure - - def execute(self, cur): - ''' - Execute the sql statement. - - Input: - cur A psycopg2 cursor - - Side effects: - Does something in the db. - Can raise a psycopg2 error - ''' - try: - super(LogSQLCommand, self).execute(cur) - except (UploadError, psycopg2.DatabaseError) as ex: - if self.log_failure: - self.log_failure(ex) - raise - else: - if self.log_success: - self.log_success() - - -class UploadLine(object): - ''' - Representation of a generic uploaded line - - Responsbile for getting data out of uploaded lines. - - Once out, the following attributes provide the user API: - raw The "raw" line as a string - tuples A sequence, the line split into columns - ''' - def __init__(self, line, stol, mapper): - ''' - line Uploaded line - lineno Line number - stol(st) Parse string st and convert into list - mapper(st) Maps st onto desired python representation - (Used for trimming whitespace) - ''' - super(UploadLine, self).__init__() - self.raw = line - self.tuples = [mapper(st) for st in stol(line)] - - -def doublequote(st): - ''' - Put string in double quotes escaping according to sql's - requirements for an identifier. - - Note that this does not work for unicode identifiers. - ''' - out = '"' - for ch in st: - if ch == '"': - out += '""' - else: - out += ch - return out + '"' - - -class UploadHeaders(UploadLine): - '''Uploaded heading line - - Derived from UploadLine - - raw The "raw" line as a string - tuples As a sequence - sql As a sql command fragment string - ''' - - def __init__(self, line, stol, mapper): - - if mapper(line) == '': - raise NoHeadersError('No column headings found on first line', - 'The first line is ({0})'.format(line)) - - super(UploadHeaders, self).__init__(line, stol, mapper) - self.sql = ', '.join(['"' + doublequote(st) + '"' - for st in self.tuples]) - - -class UploadDataLine(UploadLine): - '''Uploaded data line - - Derived from UploadLine - - raw The "raw" line as a string - tuples As a sequence - lineno The line number - ''' - def __init__(self, line, lineno, stol, mapper): - super(UploadDataLine, self).__init__(line, stol, mapper) - self.lineno = lineno - - -class DBData(object): - '''Data that is to modify the db. - - It is iterable, consisting of thunks which make an UploadedLine instance. - - lineno Number of lines processed (including header line) - - ''' - def __init__(self): - self.lineno = 0 - - def __iter__(self): - return self - - def __next__(self): - ''' - Iterator to return a thunk which, when called, delivers the - next object to be loaded into the db.''' - self.lineno += 1 - return self._thunk() - - def _thunk(): - ''' - Return a thunk which, when called, delivers the next - object to go into the db. - ''' - raise NotImplementedError() - - -class SQLData(DBData): - ''' - SQL statements returning no data that execute in the db. - - Attributes: - stmts List of SQLCommand instances - ''' - def __init__(self, stmts): - ''' - stmts List of SQLCommand instances - ''' - super(SQLData, self).__init__() - self.stmts = stmts - - def gen(stmts): - for stmt in stmts: - yield stmt - - self.stmt_gen = gen(stmts) - - def _thunk(self): - stmt = next(self.stmt_gen) - return lambda: stmt - - -class UploadData(DBData): - '''Uploaded data file - - It is iterable, consisting of thunks which make an UploadedLine instance. - - headers UploadedHeaders instance - cols Number of columns - lineno Number of lines processed (including header line) - - ''' - def __init__(self, fileo, file_fmt, null_data, null_rep, trim=True): - ''' - fileo Uploaded file object - file_fmt File format: CSV or TAB - null_data (boolean) Uploaded data contains nulls - null_rep Uploaded string representation of null - trim (boolean) Trim leading and trailing whitespace? - ''' - - def get_headers(): - try: - line = next(self._fileo) - except StopIteration: - raise NoDataError('Uploaded file contains no data') - else: - self.lineno += 1 - # Intuit the eol sequence - if line[-2:] == '\r\n': - eol = '\r\n' - line = line[:-2] - elif line[-1:] == '\n': - eol = '\n' - line = line[:-1] - elif line[-1:] == '\r': - eol = '\r' - line = line[:-1] - else: - # Give up - eol = '' - - if file_fmt == form_constants.CSV: - def func(st): - return next(csv_reader((st,))) - else: - def func(st): - return st.split('\t') - return (eol, UploadHeaders(line, func, do_trim)) - - super(UploadData, self).__init__() - - if trim: - def do_trim(st): - return st.strip() - else: - def do_trim(st): - return st - - # Read lines as text with line endings intact. - # newline='' required by csv.reader() - self._fileo = io.TextIOWrapper(fileo, newline='') - - # Get column headings - eol, self.headers = get_headers() - neg_eollen = -len(eol) - self.cols = len(self.headers.tuples) - - # Create parser to read raw lines into a list - if file_fmt == form_constants.CSV: - self._parser = lambda st: self._extend(st, - next(csv_reader((st,)))) - else: # Tab delimited format - def tab_parser(line): - if neg_eollen and line[neg_eollen:] == eol: - line = line[:neg_eollen] - return self._extend(line, line.split('\t')) - self._parser = tab_parser - - # Create mapper to frob data elements - if null_data: - null_rep = do_trim(null_rep) - - def mapper(st): - st = do_trim(st) - # psycopg2 maps None to NULL - return None if st == null_rep else st - self._mapper = mapper - else: - self._mapper = do_trim - - def _thunk(self): - ''' - Return a thunk which, when called, delivers the - UploadDataLine of the next line of the uploaded file.. - ''' - line = next(self._fileo) - return lambda: UploadDataLine(line, - self.lineno, - self._parser, - self._mapper) - - def _extend(self, line, seq): - '''Give the list as many elements as there are in the header. - - line line as string - seq line as sequence - - If there's too many elements, raise an error. - ''' - if len(seq) > self.cols: - raise TooManyColsError(self.lineno, - 'Line has too many columns', - 'More columns than column headings', - data=line) - return seq + ['' for i in range(len(seq) + 1, self.cols)] - - -class DataLineProcessor(object): - ''' - A processor supplied uploaded lines (UploadDataLine instances) - by an UploadEngine. The lines are expected to be put into a pg db. - - Abstract class not expected to be instantiated. - - Attributes: - ue UploadEngine instance - uh UploadHandler instance - cur psycopg2 cursor - - Methods: - eat(udl) Given an UploadDataLine instance put the line in the db. - ''' - def __init__(self, ue, uh): - ''' - ue UploadEngine instance - uh UploadHandler instance - ''' - super(DataLineProcessor, self).__init__() - self.ue = ue - self.uh = uh - self.cur = ue.cur - - def eat(udl): - ''' - Update the db with the content of an uploaded line - - udl An UploadDataLine instance - ''' - raise NotImplementedError - - -class NoOpProcessor(DataLineProcessor): - '''A processor that discards it's input.''' - def __init__(self, ue, uh): - ''' - ue UploadEngine instance - uh UploadHandler instance - cur psycopg2 cursor - ''' - super(NoOpProcessor, self).__init__(ue, uh) - - def eat(self, udl): - ''' - Executes an sql command in the db. - - udl A DataLine instance. - ''' - pass - - -class ExecuteSQL(DataLineProcessor): - def __init__(self, ue, uh): - ''' - ue UploadEngine instance - uh UploadHandler instance - cur psycopg2 cursor - ''' - super(ExecuteSQL, self).__init__(ue, uh) - - def eat(self, sqlc): - ''' - Executes an sql command in the db. - - sqlc An SQLCommand instance (a command and it's args) - ''' - sqlc.execute(self.cur) - - -class DBHandler(object): - ''' - Abstract class of a handler that modifies the db. - - The main logic goes here, including getting the data - into and out of the db. - - Responsible for: - Telling the form (LoadedForm) to get user data - Retrieval of data file or generation of other data to go in the db - (Generally involving an UploadData instance, and hence an UploadLine - instance.) - Validation of user supplied data; error raising: - The html form data - Whatever other data is uploaded or otherwise passed in - Pre-processing to prepare for getting data into the db - (Pre-processing is referred to as "setup" presently.) - Getting the individual bits of data into and out of the db - (by way of having a factory that produces a DataLineProcessor) - Post-processing to wrap up after the data is in the db - (Post-processing is referred to as "cleanup" presently.) - Rendering the html output - - Attributes: - request A pyramid request instance - uf An UploadForm instance - data Something (beyond what's on the form) that goes into db - ''' - def __init__(self, request): - ''' - request A pyramid request instance - ''' - super(DBHandler, self).__init__() - self.request = request - self.uf = self.make_form() - - def make_form(self): - ''' - Return an instantiation of the upload form needed - by the upload handler. - ''' - raise NotImplementedError - - def get_data(self): - ''' - Put something that will go into the db into the 'data' attribute. - ''' - raise NotImplementedError - - def val_input(self): - ''' - Validate input needed beyond that required to connect to the db. - - Note that this occurs after read() is called. - - This is expected to be replaced by it's subclass. - ''' - return [] - - def read(self): - '''Read the html form into the upload form instance''' - self.uf.read() - - def write(self, result, errors): - ''' - Return dict needed to render an html form - - Input: - errors List of Error instances - - Returns: - Dict pyramid will use to render the resulting form - Reserved keys: - errors A list of UploadError exceptions. - ''' - return self.uf.write(result, errors) - - def cleanup(self): - ''' - Called after all lines are processed to do any final - updates to the db. - ''' - pass - - def render(self, errors, response): - ''' - Return to pyramid what's needed to render the appropriate html - page. - - Input: - errors List of Error instances - response Db connection result dict - - Returns: - Dict pyramid will use to render the resulting form - ''' - response = self.write(response, errors) - return response - - -class SessionDBHandler(DBHandler): - ''' - A DBHandler that supports sessions. - - Attributes: - uf An UploadForm instance - session A pyramid session instance - ''' - def __init__(self, request): - ''' - request A pyramid request instance - ''' - super(SessionDBHandler, self).__init__(request) - self.session = self.request.session - - def write(self, result, errors): - ''' - Add csrf token into html form to protect sessions. - - Input: - errors List of Error instances - - Returns: - Dict pyramid will use to render the resulting form - Reserved keys: - errors A list of UploadError exceptions. - csrf_token Token for detecting CSRF. - ''' - response = super(SessionDBHandler, self).write(result, errors) - response['csrf_token'] = self.session.new_csrf_token() - return response - - -class UploadHandler(SessionDBHandler): - ''' - Handler for uploading a file. - - Attributes: - request A pyramid request instance - uf An UploadForm instance - data (optional) A DBData instance - ''' - def __init__(self, request): - ''' - request A pyramid request instance - ''' - super(UploadHandler, self).__init__(request) - - def factory(self, ue): - ''' - Takes an UploadEngine instance - Returns a DataLineProcessor instance - ''' - raise NotImplementedError - - def val_input(self): - ''' - Validate input needed beyond that required to connect to the db. - - Returns: - A list of Error instances - ''' - uf = self.uf - errors = super(UploadHandler, self).val_input() - - if uf['filename'] == '': - errors.append(NoFileError('No file supplied')) - - return errors - - def double_validator(self, errors): - '''Utility function that can optionally be called by - a val_input() function. It checks that the same file - has not been uploaded twice in succession to the same - db. - - `errors` - List of errors. Appended to. - ''' - uf = self.uf - if self.make_double_key() == uf['last_key']: - errors.append(DuplicateUploadError( - 'File just uploaded to this db', - ('File named ({0}) just uploaded' - .format(markupsafe.escape(uf['filename']))), - '

Try again to force upload.

')) - - def hash_sequence(self, seq): - '''Produce a hash of the elements of a sequence.''' - hash = hashlib.md5() - for item in seq: - hash.update((hash.hexdigest() + item).encode('utf-8')) - return hash.hexdigest() - - def make_double_key(self): - '''Generate unique value which identifies a double upload.''' - uf = self.uf - return self.hash_sequence((uf['db'], uf['filename'])) - - def write_double_key(self, response): - '''Utility function. Optionally called from within write() - to save a key which is later tested for to determine if - the same file is being uploaded twice in a row. - - This function keys by filename and db. - - The key is stored in the form and submitted in POST. - This way if the session times out due to inactivity - the double upload is still detected. - - Since the data is delivered to the client and the - client will likely see it anyway we use a hash not - for security but just to provide a convenient single - value that can be tested. - - Input: - response Dict used by pyramid to render html form. - - Side effects: - Modifies response. Adds 'last_key' entry used by form to store key. - ''' - response['last_key'] = self.make_double_key() - - def write(self, result, errors): - ''' - Add lines processed and number of errors into html form - shown the user. - - Input: - errors List of Error instances - - Returns: - Dict pyramid will use to render the resulting form - Reserved keys: - errors A list of UploadError exceptions. - csrf_token Token for detecting CSRF. - e_cnt Number of errors. - db_changed Boolean. Whether the db was changed. - ''' - response = super(UploadHandler, self).write(result, errors) - if hasattr(self, 'data'): - response['lines'] = self.data.lineno - 1 - response['e_cnt'] = len(errors) - response['db_changed'] = (not response['errors'] - and self.uf['action'] != '') - return response - - -class TabularFileUploadHandler(UploadHandler): - ''' - Handler for uploading a file with rows and columns and column headings. - - Attributes: - request A pyramid request instance - uf An UploadForm instance - data An UploadData instance - ''' - def __init__(self, request): - ''' - request A pyramid request instance - ''' - super(TabularFileUploadHandler, self).__init__(request) - - def cleanup(self): - '''Finish after processing all lines.''' - lines = self.ue.data.lineno - if lines == 1: - raise DataLineError( - 1, - 'File contains no data', - ('No lines found after ' - 'initial line of column headings')) - - -class DBConnector(object): - '''Abstract class supporting db connection and execution of a - processor function that alters the db content. - - Responsible for connecting to the db, transaction management, - and running the DataLineProcessor instances produced by the - DBConnector's (the UploadHandler's) factory. - Collects all errors raised during the processing. - Telling the uploadhandler to cleanup. This is done after - all upload processing and before commit or rollback - (and before the connection is closed). - - Attributes: - uh An UploadHandler instance. - cur A psycopg2 cursor instance - db Name of db to connect to - user User to connect to db - password Password to connect to db - action Upload form's hidden action variable - '' means not submitted, anything else means take action - - Methods: - run() Get a DataLineProcessor instance from the upload handler's - factory and feed it by iterating over data. - ''' - - def __init__(self, uh): - ''' - uh An UploadHandler instance - ''' - super(DBConnector, self).__init__() - - # Configuration and response management. - self.uh = uh - - def call_alter_db(self, conn): - ''' - Alter the contents of the db. - - Input: - conn A database connection - - Returns: - A list of Error instances - - Side Effects: - Alter's db - ''' - - raise NotImplementedError - - def no_connection_response(self): - ''' - The result dict to use when call_with_connection is not called. - Matches with the result dict of call_with_connection - - Returns: - Dict pyramid will use to render the resulting form. - Reserved keys: - havecreds Boolean. Supplied credentials can connect to db. - ''' - return {'havecreds': False} - - def nodberror_factory(self): - return NoDBError('No database name supplied') - - def nousererror_factory(self): - return NoUserError('No user name supplied as login credentials') - - def authfailerror_factory(self): - return AuthFailError('Unable to login', - 'Is the database, user, and password correct?') - - def dryrunerror_factory(self): - return DryRunError('Configured for "dry_run":' - ' Transaction deliberately rolled back') - - def upload_data(self, data, errors): - '''Put a DBData object into the db. - - Input: - data A DBData object. - errors A list of errors - - Side Effects: - Alters db content - Add errors to `errors`. - ''' - raise NotImplementedError - - def alter_db(self, conn): - ''' - Alter the contents of the db. - - Input: - conn A database connection - - Returns: - A list of Error instances - - Side Effects: - Alter's db - ''' - - errors = [] - try: - self.uh.get_data() - self.data = self.uh.data - self.cur = conn.cursor() - # (Cannot call uh until after self is fully - # initalized, including self.cur.) - processor = self.uh.factory(self) - except Error as ex: - errors.append(ex) - else: - try: - self.upload_data(processor, self.data, errors) - # Let upload handler finish - try: - self.uh.cleanup() - except UploadError as ex: - errors.append(ex) - finally: - self.cur.close() - return errors - - def call_with_connection(self, func): - ''' - Validate input, connect to the db, and do something with - the connection. - - func(conn) Call this function with the connection. - func(conn) must return a list of Error instances - - Returns: - (errors, response) - errors List of Error instances - response Dict pyramid will use to render the resulting form. - The dict returned by func(conn) plus reserved keys. - Reserved keys: - havecreds Boolean. Supplied credentials can - connect to db. - Side effects: - Raises errors, calls func(conn) - ''' - errors = [] - havecreds = False - response = {} - - if self.db == '': - errors.append(self.nodberror_factory()) - - if self.user == '': - errors.append(self.nousererror_factory()) - - newes = self.uh.val_input() - if newes: - if not errors: - havecreds = True - errors.extend(newes) - - if not errors: - registry = self.uh.request.registry - try: - conn = psycopg2.connect( - database=self.db, - user=self.user, - password=self.password, - host=registry.settings['pg_host'], - port=registry.settings['pg_port']) - except psycopg2.OperationalError: - errors = [self.authfailerror_factory()] - havecreds = False - else: - havecreds = True - errors = self.call_alter_db(func, conn) - - response.update({'havecreds': havecreds - }) - return (errors, response) - - def read_uh(self): - '''Read data into the upload handler.''' - self.uh.read() - - def read_db(self): - '''Read the db from the upload handler.''' - self.db = self.uh.uf['db'] - - def read_user(self): - '''Read the username from the upload handler.''' - self.user = self.uh.uf['user'] - - def read_password(self): - '''Read the password from the upload handler.''' - if 'password' in self.uh.uf: - self.password = self.uh.uf['password'] - else: - self.password = '' - - def read_action(self): - '''Read the action from the upload handler.''' - self.action = self.uh.uf['action'] - - def read(self): - '''Allows for custom engines to map what the handler - has to what they need. - ''' - self.read_uh() - self.read_db() - self.read_user() - self.read_password() - self.read_action() - - def run(self): - '''Call the alter_db function with the connection to alter the db, - render form and return Response - - Input: - - Returns: - (errors, response) - errors List of Error instantiations - response Dict containing connection result info - - Side effects: - Maintains session - (Whatever the processor() call does) - Outputs html - ''' - self.read() - - if self.action == 'u': - errors, response = self.call_with_connection(self.alter_db) - else: - errors = [] - response = self.no_connection_response() - - return self.uh.render(errors, response) - - -class NoTransactionEngine(DBConnector): - '''An engine that does not run things in a transaction. - - It also does not do CSRF checking. - - Attributes: - uh An UploadHandler instance. - data An UploadData instance of the uploaded data - cur A psycopg2 cursor instance - db Name of db to connect to - user User to connect to db - password Password to connect to db - action Upload form's hidden action variable - '' means not submitted, anything else means take action - - Methods: - run() Get a DataLineProcessor instance from the upload handler's - factory and feed it by iterating over data. - ''' - def __init__(self, uh): - ''' - uh An UploadHandler instance - ''' - super(NoTransactionEngine, self).__init__(uh) - - def call_alter_db(self, func, conn): - ''' - Call a database modification function with a db connection, - turning off the automatic wrapping of the activity in - a transaction. - - func(conn) Call this function with the connection. - func(conn) must return a list of Error instances - - Returns: - errors List of Error instances - Side effects: - Calls func(conn) - ''' - conn.set_session(autocommit=True) - errors = func(conn) - conn.close() - return errors - - def upload_data(self, processor, data, errors): - '''Put a DBData object into the db, without a wrapping transaction. - - Input: - processor A DataLineProcessor object. - data A DBData object. - errors A list of errors - - Side Effects: - Alters db content - Add errors to `errors`. - ''' - for thunk in data: - try: - udl = thunk() - except DataLineError as ex: - errors.append(ex) - else: - try: - processor.eat(udl) - except psycopg2.DatabaseError as ex: - errors.append(DBDataLineError(udl, ex)) - except DataLineError as ex: - errors.append(ex) - except DBError as ex: - errors.append(ex) - - -class UnsafeUploadEngine(DBConnector): - '''Abstract class supporting repeatedly running a processer to put - uploaded data into db. - - Attributes: - uh An UploadHandler instance. - data An UploadData instance of the uploaded data - cur A psycopg2 cursor instance - db Name of db to connect to - user User to connect to db - password Password to connect to db - action Upload form's hidden action variable - '' means not submitted, anything else means take action - - Methods: - run() Get a DataLineProcessor instance from the upload handler's - factory and feed it by iterating over data. - eat_old_line(udl, thunk) - Trap errors raised by the db while running thunk. - Report any errors as due to the udl UploadDataLine - instance. - ''' - def __init__(self, uh): - ''' - uh An UploadHandler instance - ''' - super(UnsafeUploadEngine, self).__init__(uh) - - def call_alter_db(self, func, conn): - ''' - Call a database modification function with a connection. - - func(conn) Call this function with the connection. - func(conn) must return a list of Error instances - - Returns: - errors List of Error instances - Side effects: - Calls func(conn) - ''' - errors = func(conn) - - if errors: - conn.rollback() - else: - if ast.literal_eval(self.uh.request.registry.settings['dry_run']): - conn.rollback() - errors.append(self.dryrunerror_factory()) - else: - try: - conn.commit() - except psycopg2.DatabaseError as ex: - errors.append(DBCommitError(ex)) - conn.close() - return errors - - def eat_old_line(self, udl, thunk): - ''' - Run thunk and report any errors raised by the db as due to - the UploadDataLine instance udl. - - Returns: - The result of running thunk. - ''' - try: - result = thunk() - except psycopg2.DatabaseError as ex: - raise DBDataLineError(udl, ex) - else: - return result - - def upload_data(self, processor, data, errors): - '''Put a DBData object into the db. - - Input: - processor A DataLineProcessor object. - data A DBData object. - errors A list of errors - - Side Effects: - Alters db content - Add errors to `errors`. - ''' - for thunk in data: - try: - udl = thunk() - except DataLineError as ex: - errors.append(ex) - else: - self.cur.execute( - 'SAVEPOINT line_savepoint;') - try: - processor.eat(udl) - except psycopg2.DatabaseError as ex: - self.cur.execute( - 'ROLLBACK TO line_savepoint;') - errors.append(DBDataLineError(udl, ex)) - except DataLineError as ex: - self.cur.execute( - 'ROLLBACK TO line_savepoint;') - errors.append(ex) - except DBError as ex: - self.cur.execute( - 'ROLLBACK TO line_savepoint;') - errors.append(ex) - else: - self.cur.execute( - 'RELEASE line_savepoint;') - - -class UploadEngine(UnsafeUploadEngine): - '''Abstract class supporting repeatedly running a processor - to alter db content. Adds cross-site request forgery checking - to the UnsafeUploadEngine class. - - Attributes: - uh An UploadHandler instance. - cur A psycopg2 cursor instance - db Name of db to connect to - user User to connect to db - password Password to connect to db - action Upload form's hidden action variable - '' means not submitted, anything else means take action - csrf_token The client's csrf_token - - Methods: - run() Get a DataLineProcessor instance from the upload handler's - factory and feed it by iterating over data. - ''' - - def __init__(self, uh): - ''' - uh An UploadHandler instance - ''' - super(UploadEngine, self).__init__(uh) - - def csrferror_factory(self): - return CSRFError( - 'Your request failed and you are now logged out', - ('This is a security measure. ' - 'Some possible causes are:'), - ('
    ' - '
  • Cookies are off in the browser
  • ' - '
  • Reload of an expired page
  • ' - '
      ' - '
    • Use of the Back button to (re)submit' - ' an old page
    • ' - '
    • An attacker has crafted a page which resubmits' - ' one of your old page submissions
    • ' - '
    ' - '
  • Server restart
  • ' - '
  • Login session expired due to inactivity
  • ' - '
')) - - def read_csrf_token(self): - '''Save the csrf token.''' - post = self.uh.request.POST - if 'csrf_token' in post: - self.csrf_token = post['csrf_token'] - else: - self.csrf_token = None - - def read(self): - '''Add a csrf_token.''' - super(UploadEngine, self).read() - self.read_csrf_token() - - def call_with_connection(self, func): - ''' - Connect to the db. - - func(conn) Call this function with the connection. - f(conn) must return a (errors, dict) tuple result, - errors list of Error instances - dict other results - - Returns: - (errors, response) - errors List of Error instances - response Dict pyramid will use to render the resulting form. - The dict returned by func(conn) plus reserved keys. - Reserved keys: - havecreds Boolean. Supplied credentials can - connect to db. - session_expired Boolean. Session expired. - Side effects: - Raises errors, calls func(conn) - ''' - session = self.uh.session - - token = session.get_csrf_token() - if token != self.csrf_token: - session.invalidate() - response = self.no_connection_response() - response['session_expired'] = True - return([self.csrferror_factory()], - response) - else: - (errors, response) = (super(UploadEngine, self) - .call_with_connection(func)) - response['session_expired'] = False - return (errors, response) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..4ac6edb --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,30 @@ +# Copyright (C) 2018 The Meme Factory, Inc. http://www.meme.com/ + +# This file is part of PGWUI_Core. +# +# This program is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public +# License along with this program. If not, see +# . +# + +# Karl O. Pinc + +from pgwui_core import core +from pgwui_core import form_constants + + +def test_userinitialpost_init(): + uip = core.UserInitialPost() + assert uip.db == form_constants.LIVE_DB + assert uip.user == '' + assert uip.password == '' diff --git a/tests/test_pgwui_core.py b/tests/test_pgwui_core.py deleted file mode 100644 index db7506a..0000000 --- a/tests/test_pgwui_core.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (C) 2018 The Meme Factory, Inc. http://www.meme.com/ - -# This file is part of PGWUI_Core. -# -# This program is free software: you can redistribute it and/or -# modify it under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of -# the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public -# License along with this program. If not, see -# . -# - -# Karl O. Pinc - -from pgwui_core import pgwui_core -from pgwui_core import form_constants - - -def test_userinitialpost_init(): - uip = pgwui_core.UserInitialPost() - assert uip.db == form_constants.LIVE_DB - assert uip.user == '' - assert uip.password == ''