pattern = r""" ( # code (capture) # BEGIN multicode (?: \( \s* )? # maybe open paren and maybe space # code [A-Z]*H # prefix \d+ # digits [a-z]* # suffix (?: # maybe followed by other codes, \s* \+ \s* # ... plus-separated # code [A-Z]*H # prefix \d+ # digits [a-z]* # suffix )* (?: \s* [\):+] )? # maybe space and maybe close paren or colon or plus # END multicode ) ( .*? ) # message (capture): everything ... (?= # ... up to (but excluding) ... # ... the next code # BEGIN multicode (?: \( \s* )? # maybe open paren and maybe space # code [A-Z]*H # prefix \d+ # digits [a-z]* # suffix (?: # maybe followed by other codes, \s* \+ \s* # ... plus-separated # code [A-Z]*H # prefix \d+ # digits [a-z]* # suffix )* (?: \s* [\):+] )? # maybe space and maybe close paren or colon or plus # END multicode # (but not when followed by punctuation) (?! [^\w\s] ) # ... or the end | $ ) """