import os import re import sys if len(sys.argv) < 2: print('Usage: python lst_convert_generic.py [file.lst]', file=sys.stderr) sys.exit(1) input_filename = sys.argv[1] if not os.path.exists(input_filename): print(f'Cannot find file {input_filename}.', file=sys.stderr) sys.exit(1) style = ''' body { font-variant-ligatures: none; } pre { font-family: 'Courier New', Courier, monospace; } a { color: black; } .bottom-nav { position: sticky; bottom: 0px; background-color: white; text-align: right; } h3, h4 { margin: 0; } ''' preamble = f''' {input_filename} ''' postamble = ''' ''' category = { 'A': 'applications', 'a': 'access software (screen readers, etc)', 'B': 'BIOS', 'b': 'vendor-specific BIOS extensions,', 'C': 'CPU-generated', 'c': 'caches/spoolers,', 'D': 'DOS kernel', 'd': 'disk I/O enhancements,', 'E': 'DOS extenders', 'e': 'electronic mail', 'F': 'FAX,', 'f': 'file manipulation', 'G': 'debuggers/debugging tools', 'g': 'games,', 'H': 'hardware', 'h': 'vendor-specific hardware,', 'I': 'IBM workstation/terminal emulators', 'i': 'system info/monitoring,', 'J': 'Japanese', 'j': 'joke programs,', 'K': 'keyboard enhancers', 'k': 'file/disk compression,', 'l': 'shells/command interpreters,', 'M': 'mouse/pointing device', 'm': 'memory management,', 'N': 'network', 'n': 'non-traditional input devices,', 'O': 'other operating systems,', 'P': 'printer enhancements', 'p': 'power management,', 'Q': 'DESQview/TopView and Quarterdeck programs,', 'R': 'remote control/file access', 'r': 'runtime support,', 'S': 'serial I/O', 's': 'sound/speech,', 'T': 'DOS-based task switchers/multitaskers', 't': 'TSR libraries', 'U': 'resident utilities', 'u': 'emulators,', 'V': 'video', 'v': 'virus/antivirus,', 'W': 'MS Windows,', 'X': 'expansion bus BIOSes', 'x': 'non-volatile config storage', 'y': 'security', '*': 'reserved (and not otherwise classified)', '!': 'document info', '-': 'uncategorized', } REGEX_HEADER = re.compile(r'--------(.)(.+)') REGEX_TITLE = re.compile(r'.*?-?(.*)') with open(input_filename, 'rb') as f: source = f.read().decode('latin_1') segments = [] current_segment = [] for line in source.split('\r\n'): matchres = REGEX_HEADER.match(line) if matchres: if current_segment: segments.append(current_segment) current_segment = [] current_segment.append(line) if current_segment: segments.append(current_segment) current_segment = [] header = segments[0][:2] description = segments[0][2:] segments = segments[1:] parsed_segments = [] ids = {} for seg in segments: if len(seg) < 2: continue matchres = REGEX_HEADER.match(seg[0]) typ, i = matchres.groups() i = i.strip('-') if i in ids: link_id = f'{i}_{ids[i]}' ids[i] += 1 else: link_id = i ids[i] = 0 matchres = REGEX_TITLE.match(seg[1]) title = matchres.groups()[0].strip() parsed_segments.append((typ, i, title, seg[1:], link_id)) # collect category toc. alpha_idx = {} for seg in parsed_segments: if not alpha_idx.get(seg[0]): alpha_idx[seg[0]] = [] alpha_idx[seg[0]].append((seg[1], seg[2], seg[4])) with open(f'{input_filename}.HTML', 'w') as f: f.write(preamble) # write header. f.write('\n

\n') for line in header: f.write(f'\n{line}
\n') f.write('\n

\n') # write description. f.write('\n
\n')
    for line in description:
        f.write(line)
        f.write('\n')
    f.write('\n
\n') f.write('\n

Table of Contents: by Order by Category

\n') f.write('\n

\nTable of Contents by Order
\n\n') for typ, i, title, body, link_id in parsed_segments: f.write(f'{i} - {title}
\n') f.write('\nTop\n') f.write('\n

\nTable of Contents by Category

\n\n') for ak in sorted(alpha_idx): f.write(f'{ak}') if ak in category: f.write(f' - {category[ak]}') f.write('
\n') f.write('
\n') for ak in sorted(alpha_idx): f.write(f'

{ak}') if ak in category: f.write(f' - {category[ak]}') f.write('

\n') for i, title, link_id in alpha_idx[ak]: f.write(f'{i} - {title}
\n') f.write('\nTop\n') f.write('
\n') f.write('\n
\n') # write body. for typ, i, title, body, link_id in parsed_segments: f.write(f'\n{i} - {title}
') f.write('\n
\n')
        for line in body:
            f.write(line.replace('&', '&')..replace('<', '<').replace('>', '>'))
            f.write('\n')
        f.write('\n
\n') f.write('\nTop\n') f.write('\n
\n') f.write('\n
Home TOC: by Order by Category Top
') f.write(postamble)