import re
preamble = '''
OPCODES.LST
'''
postamble = '''
'''
REGEX_APPENDIX = re.compile(r'APPENDIX\s+([^\s]+)(?:\s+-\s+)?\s*(.*)?')
REGEX_OPCODE = re.compile(r'(?:OPCODE\s+)?(.*?)\s+-?\s+(.*)')
with open('OPCODES.LST', 'rb') as f:
source = f.read().decode('latin_1')
segments = []
current_segment = []
for line in source.split('\r\n'):
if line.startswith('------------------'):
if current_segment:
segments.append(current_segment)
current_segment = []
current_segment.append(line)
if current_segment:
segments.append(current_segment)
current_segment = []
header = segments[0]
description = segments[1]
segments = segments[2:]
parsed_segments = []
cnt = 0
for seg in segments:
if len(seg) < 2: continue
matchres = REGEX_APPENDIX.match(seg[1])
if matchres:
# appendix.
groups = matchres.groups()
parsed_segments.append(('APPX', groups[0], groups[1], seg[2:]))
continue
matchres = REGEX_OPCODE.match(seg[1])
if matchres:
# opcode.
groups = matchres.groups()
parsed_segments.append(('OP', groups[0], groups[1], seg[2:]))
continue
parsed_segments.append((None, f'__id{cnt}', seg[1], seg[2:]))
cnt += 1
# collect toc.
alpha_idx = {}
for seg in parsed_segments:
if seg[0] == 'OP':
if not alpha_idx.get(seg[1][0]):
alpha_idx[seg[1][0]] = []
alpha_idx[seg[1][0]].append(seg[1])
with open('OPCODES.LST.HTML', 'w') as f:
f.write(preamble)
# write header.
f.write('\n\n')
for line in header:
f.write(f'\n{line}
\n')
f.write('\n
\n')
# write description.
f.write('\n\n')
for line in description:
f.write(line)
f.write('\n')
f.write('\n
\n')
# write toc.
f.write('\n\nTable of Contents
\n\n')
for ak in sorted(alpha_idx):
f.write(f'{ak} ')
for i in alpha_idx[ak]:
f.write(f'{i} ')
f.write('
\n')
f.write('\n
\n')
# write toc for appendix.
f.write('\nAppendix
\n')
for typ, i, title, body in parsed_segments:
if typ != 'APPX': continue
f.write(f'APPENDIX {i}: {title}
\n')
f.write('\n
\n')
f.write('\n
\n')
# write body.
for typ, i, title, body in parsed_segments:
if typ == 'OP':
f.write(f'\n{i} - {title}
')
elif typ == 'APPX':
f.write(f'\nAPPENDIX {i} - {title}
')
f.write('\n\n')
for line in body:
f.write(line.replace('<', '<').replace('>', '>'))
f.write('\n')
f.write('\n
\n')
f.write('\nTop\n')
f.write('\n
\n')
f.write('\n')
f.write(postamble)