# Copyright (C) 2024 The Meme Factory, Inc., http://www.karlpinc.com/
#
#    This file is part of Babase.
#
#    Babase is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Babase.  If not, see <http://www.gnu.org/licenses/>.
#
# Convert encoded "characters".    Either to Unicode or to plain 7-bit ASCII.
#
#
# Karl O. Pinc <kop@karlpinc.com>
#
import sys

ASCII_MAP = {
    0x2026: '...',    # …   (ALLMISCS.Text for AlMID 10478)
    0xdcd5: "'",      # ’   Mac OS Roman, RIGHT SINGLE QUOTATION MARK
    0xdc92: "'",      #     Windows-1252, RIGHT SINGLE QUOTATION MARK
    0xd4: 'u',        # Ô   Part of a sequence all replaced with "u"
    0xf8: '',         # ø   Part of a sequence all replaced with "u"
    0x03a9: '',       # Ω   Part of a sequence all replaced with "u"
    0xdcb5: 'u',      # µ
    0xdca0: ' ',      # <nbsp> U+00a0 Windows-1252 <space> 20
                      # NO-BREAK SPACE
    0xdc91: "'",      # ‘    U+2018 Windows-1252   '    27
                      # LEFT SINGLE QUOTATION MARK
    0xdc83: 'E',      # É    U+00c9 Mac OS Roman   E    54
                      # LATIN CAPITAL LETTER E WITH ACUTE
    0xdce0: 'a',      # à    U+00e0 Windows-1252   a    61
                      # LATIN SMALL LETTER A WITH GRAVE
    0xdcc6: "'",      # Used as an apostrophe      '    27
    0x01f412: '',     # 🐒 Removed
    0xdcca: ' ',      # Appears to be used in place of EOL??
                      # replacing with space (0x20)
    0xb5: 'u',        # µ    U+00b5         u    75       MICRO SIGN
}

UNICODE_MAP = {
    0xdc8e: chr(0x00e9), # é    U+00e8 Mac OS Roman   e    65
                         # LATIN SMALL LETTER E WITH ACUTE
    0xdc92: chr(0x2019), # ’    U+2019 Windows-1252   '    72
                         # RIGHT SINGLE QUOTATION MARK
    0xdc90: chr(0x00ea), # ê    U+00ea Mac OS Roman   e    65
                         # LATIN SMALL LETTER E WITH CIRUMFLEX
    0xdc8b: chr(0x00e3), # ã    U+00e3 Mac OS Roman   a    16
                         # LATIN SMALL LETTER A WITH TILDE
    0xdc87: chr(0x00e1), # á    U+00e1 Mac OS Roman   a    16
                         # LATIN SMALL LETTER A WITH ACUTE
    0xdc97: chr(0x00f3), # ó    U+00f3 Mac OS Roman   o    f7
                         # LATIN SMALL LETTER O WITH ACUTE
    0xdc8d: chr(0x00e7), # ç    U+00e7 Mac OS Roman   c    36
                         # LATIN SMALL LETTER O WITH CEDILLA
    0xdcd5: chr(0x2019), # ’    U+2019 Mac OS Roman   '    72
                         # RIGHT SINGLE QUOTATION MARK
    0xdca0: chr(0x00a0), # <nbsp> U+00a0 Windows-1252 to unicode non-break
                         # NO-BREAK SPACE
    0xdc91: chr(0x2018), # ‘    U+2018 Windows-1252   '    27
                         # LEFT SINGLE QUOTATION MARK
    0xdc83: chr(0x00c9), # É    U+00c9 Mac OS Roman   E    54
                         # LATIN CAPITAL LETTER E WITH ACUTE
    0xdce0: chr(0x00e0), # à    U+00e0 Windows-1252   a    61
                         # LATIN SMALL LETTER A WITH GRAVE
    0xdcc6: "'",         # Used as an apostrophe      '    27
    0xdcca: ' ',         # Appears to be used in place of EOL??
                         # replacing with space (0x20)
    0xdcb5: chr(0x00b5), # µ    U+00b5 [both]         u    75
                         # MICRO SIGN
    0xdcc9: chr(0x2026), # …    U+2026 Mac OS Roman
                         # HORIZONTAL ELLIPSES
    0xdcaf: '0',         # Ø    U+00D8 Windows-1252   0    30
                         # LATIN CHARACTER O WITH A STROKE
    0xdcf3: chr(0x00f3), # ó    U+00F3 Windows-1252   o    6f
                         # LATIN SMALL LETTER O WITH ACUTE
    0xdce3: chr(0x00e3), # ã    U+00E3 Windows-1252   a    61
                         # LATIN SMALL LETTER A WITH TILDE
    0xdce9: chr(0x00e9), # é    U+00E9 Windows-1252   e    65
                         # LATIN SMALL LETTER E WITH ACUTE
    0xdced: chr(0x00ed), # í    U+00ED Windows-1252   i    69
                         # LATIN SMALL LETTER I WITH ACUTE
    0xdcea: chr(0x00ea), # ê    U+00EA Windows-1252   e    65
                         # LATIN SMALL LETTER E WITH CIRCUMFLEX
    0xdce1: chr(0x00e1), # á    U+00E1 Windows-1252   a    61
                         # LATIN SMALL LETTER A WITH ACUTE
    0xdce7: chr(0x00e7), # ç    U+00E7 Windows-1252   c    63
                         # LATIN SMALL LETTER C WITH CEDILLA
    0xdcfa: chr(0x00fa), # ú    U+00FA Windows-1252   u    75
                         # LATIN SMALL LETTER U WITH ACUTE
    0xdcf4: chr(0x00f4), # ô    U+00F4 Windows-1252   o    6f
                         # LATIN SMALL LETTER O WITH CIRCUMFLEX
    0xdccd: chr(0x00cd), # Í    U+00CD Windows-1252   I    49
                         # LATIN CAPITAL LETTER i WITH ACUTE
    0xdce2: chr(0x00e1), # á    U+00E1 Windows-1252   a    61
                         # LATIN SMALL LETTER A WITH ACUTE
    0xdcc1: chr(0x00c1), # Á    U+00C1 Windows-1252   A    65
                         # LATIN CAPITAL LETTER A WITH ACUTE
    0xdcc7: chr(0x00c7), # Ç    U+00C7 Windows-1252   C    67
                         # LATIN CAPITAL LETTER C WITH CEDILLA
    0xdc85: chr(0x2026), # …    U+2026 Windows-1252
                         # HORIZONTAL ELLIPSES
}


def unicode_convert(char):
    try:
        char.encode()
    except UnicodeEncodeError:
        return UNICODE_MAP[ord(char)]
    return char


def ascii_convert(char):
    return ASCII_MAP.get(ord(char), char)


# Universal newlines, so we can read a line at a time, but with EOL
# chars produced "as-is".
sys.stdin.reconfigure(newline='', errors='surrogateescape')

if len(sys.argv) > 1 and sys.argv[1] == '-u':
    convert_char = unicode_convert
else:
    convert_char = ascii_convert

for line in sys.stdin:
    for char in line:
        if char.isascii():
             print(char, end='')
        else:
             print(convert_char(char), end='')
