import re preamble = ''' OPCODES.LST ''' postamble = ''' ''' REGEX_APPENDIX = re.compile(r'APPENDIX\s+([^\s]+)(?:\s+-\s+)?\s*(.*)?') REGEX_OPCODE = re.compile(r'(?:OPCODE\s+)?(.*?)\s+-?\s+(.*)') with open('OPCODES.LST', 'rb') as f: source = f.read().decode('latin_1') segments = [] current_segment = [] for line in source.split('\r\n'): if line.startswith('------------------'): if current_segment: segments.append(current_segment) current_segment = [] current_segment.append(line) if current_segment: segments.append(current_segment) current_segment = [] header = segments[0] description = segments[1] segments = segments[2:] parsed_segments = [] cnt = 0 for seg in segments: if len(seg) < 2: continue matchres = REGEX_APPENDIX.match(seg[1]) if matchres: # appendix. groups = matchres.groups() parsed_segments.append(('APPX', groups[0], groups[1], seg[2:])) continue matchres = REGEX_OPCODE.match(seg[1]) if matchres: # opcode. groups = matchres.groups() parsed_segments.append(('OP', groups[0], groups[1], seg[2:])) continue parsed_segments.append((None, f'__id{cnt}', seg[1], seg[2:])) cnt += 1 # collect toc. alpha_idx = {} for seg in parsed_segments: if seg[0] == 'OP': if not alpha_idx.get(seg[1][0]): alpha_idx[seg[1][0]] = [] alpha_idx[seg[1][0]].append(seg[1]) with open('OPCODES.LST.HTML', 'w') as f: f.write(preamble) # write header. f.write('\n

\n') for line in header: f.write(f'\n{line}
\n') f.write('\n

\n') # write description. f.write('\n
\n')
    for line in description:
        f.write(line)
        f.write('\n')
    f.write('\n
\n') # write toc. f.write('\n

\nTable of Contents
\n\n') for ak in sorted(alpha_idx): f.write(f'{ak}  ') for i in alpha_idx[ak]: f.write(f'{i}   ') f.write('
\n') f.write('\n

\n') # write toc for appendix. f.write('\n

Appendix
\n') for typ, i, title, body in parsed_segments: if typ != 'APPX': continue f.write(f'APPENDIX {i}: {title}
\n') f.write('\n

\n') f.write('\n
\n') # write body. for typ, i, title, body in parsed_segments: if typ == 'OP': f.write(f'\n{i} - {title}
') elif typ == 'APPX': f.write(f'\nAPPENDIX {i} - {title}
') f.write('\n
\n')
        for line in body:
            f.write(line.replace('<', '<').replace('>', '>'))
            f.write('\n')
        f.write('\n
\n') f.write('\nTop\n') f.write('\n
\n') f.write('\n
Home TOC: by Order Top
') f.write(postamble)