fhem-mirror/fhem/FHEM/lib/Normalize/Text/Music_Fields.pm

package Normalize::Text::Music_Fields;	# Music_Normalize_Fields
$VERSION = '0.02';
use strict;
use Config;
#use utf8;			# Needed for 5.005...

my %tr;
my %short;

sub translate_dots ($) {
  my $a = shift;
  $a =~ s/^\s+//;
  $a =~ s/\s+$//;
  $a =~ s/\s+/ /g;
  $a =~ s/\b(\w)\.\s*/$1 /g;
  $a =~ s/(\w\.)\s*/$1 /g;
  lc $a
}

sub translate_tr ($) {
  my $a = shift;
  $a = $tr{translate_dots $a} or return;
  return $a;
}

sub strip_years ($) {		# strip dates
  my ($a) = (shift);
  my @rest;
  return $a unless $a =~ s/\s+((?:\([-\d,]+\)(\s+|$))+)$//;
  @rest = split /\s+/, $1;
  return $a, @rest;
}

sub strip_duplicate_dates {	# Remove $d[0] if it matches $d_r
  my ($d_r, @d) = @_;
  return unless @d;
  $d_r   = substr $d_r,  1, length($d_r)  - 2; # Parens
  my $dd = substr $d[0], 1, length($d[0]) - 2; # Parens
  my @dates_r = split /,|--|-(?=\d\d\d\d)/, $d_r;
  my @dates   = split /,|--|-(?=\d\d\d\d)/, $dd;
  for my $d (@dates) {
    return @d unless grep /^\Q$d\E(-|$)/, @dates_r;
  }
  return @d[1..$#d];
}

sub __split_person ($) {
  # Non-conflicting ANDs (0x438 is cyrillic "i", word is cyrillic "per")
  split /([,;:]\s+(?:\x{043f}\x{0435}\x{0440}\.\s+)?|\s+(?:[-&\x{0438}ei]|and|et)\s+|\x00)/, shift;
}

sub _translate_person ($$$);
sub _translate_person ($$$) {
  my ($self, $aa, $with_year) = (shift, shift, shift);
  my $fail = ($with_year & 2);
  $with_year &= 1;
  my $ini_a = $aa;
  $aa = $aa->[0] if ref $aa;		# [value, handler]
  $aa =~ s/\s+$//;
  load_lists() unless %tr;
  # Try early fixing:
  my $a1 = translate_tr $aa;
  return ref $ini_a ? [$a1, $ini_a->[1]] : $a1 if $a1 and $with_year;
  my ($a, @date) = strip_years($aa);
  my $tr_a = translate_tr $a;
  if (not defined $tr_a and $a =~ /(.*?)\s*,\s*(.*)/s) { # Schumann, Robert
    $tr_a = translate_tr "$2 $1";
  }
  if (not defined $tr_a) {
    return if $fail;
    my $ini = $aa;
    # Normalize "translated" to "transl."
    # echo "¯¥à¥¢®¤" | perl -wnle 'BEGIN{binmode STDIN, q(encoding(cp866))}printf qq(\\x{%04x}), ord $_ for split //'
    $aa =~ s/(\s\x{043f}\x{0435}\x{0440})\x{0435}\x{0432}\x{043e}\x{0434}\x{0435}?(\s)/$1.$2/g;
    $aa =~ s/(\s+)\x{0432}\s+(?=\x{043f}\x{0435}\x{0440}\.)/;$1/g; # v per. ==> , per.
    $aa =~ s/[,;.]\s+(\x{043f}\x{0435}\x{0440}\.)\s*/; $1 /g; # normalize space, punct
    $aa =~ s/\b(transl)ated\b/$1./g;

    my @parts = __split_person $aa;
    if (@parts <= 1) {		# At least normalize spacing:
      # Add dots after initials
      $aa =~ s/\b(\w)\s+(?=(\w))/
	       ($1 ne lc $1 and $2 ne lc $2) ? "$1." : "$1 " /eg;
      # Separate initials by spaces unless in a group of initials
      $aa =~ s/\b(\w\.)(?!$|[-\s]|\w\.)/$1 /g;
      return ref $ini_a ? [$aa, $ini_a->[1]] : $aa;
    }
    for my $i (0..$#parts) {
      next if $i % 2;		# Separator
      my $val = _translate_person($self, $parts[$i], $with_year | 2); # fail
      # Deal with cases (currently, in Russian only, after "transl.")
      if (not defined $val and $i
	  and $parts[$i-1] =~ /^;\s+\x{043f}\x{0435}\x{0440}\.\s+$/ # per
	  and $parts[$i] =~ /(.*)\x{0430}$/s) {
	$val = _translate_person($self, "$1", $with_year | 2); # fail
      }
      $val ||= _translate_person($self, $parts[$i], $with_year); # cosmetic too
      $parts[$i] = $val if defined $val;
    }
    $tr_a = join '', @parts;
    return $ini_a if $tr_a eq $ini;
    @date = ();			# Already taken into account...
  }
  my ($short, @date_r) = strip_years($tr_a); # Real date
  @date = strip_duplicate_dates($date_r[0], @date) if @date_r == 1 and @date;
  $tr_a = $short unless $with_year;
  $a = join ' ', $tr_a, @date;
  return ref $ini_a ? [$a, $ini_a->[1]] : $a;
}

sub normalize_person ($$) {
  return _translate_person(shift, shift, 1);
}

for my $field (qw(artist artist_collection)) {
  no strict 'refs';
  *{"normalize_$field"} = \&normalize_person;
}

sub short_person ($$);
sub short_person ($$) {
  my ($self, $a) = (shift, shift);
  my $ini_a = $a;
  $a = $a->[0] if ref $a;		# [value, handler]
  $a = _translate_person($self, $a, 0); # Normalize, no dates of life
  $a =~ s/\s+$//;
  ($a, my @date) = strip_years($a);
  my @parts;
  if (exists $short{$a}) {
    $a = $short{$a};
  } elsif (@parts = __split_person $a and @parts > 1) {
    for my $i (0..$#parts) {
      next if $i % 2;		# Separator
      $parts[$i] = short_person($self, $parts[$i]);
    }
    $a = join '', @parts;
  } else {
    # Drop years of life
    shift @date if @date and $date[0] =~ /^\(\d{4}-[-\d,]*\d{4,}[-\d,]*\)$/;
    # Add dots after initials
    $a =~ s/\b(\w)\s+(?=(\w))/
            ($1 ne lc $1 and $2 ne lc $2) ? "$1." : "$1 " /eg;
    # Separate initials by spaces unless in a group of initials
    $a =~ s/\b(\w\.)(?!$|[-\s]|\w\.)/$1 /g;
    my @a = split /\s+/, $a;
    # Skip shorting if there are strange non upcased parts (e.g., "-") or '()')
    my @check = @a;
    my $von = (@a > 2 and $a[-2] =~ /^[a-z]+$/);
    splice @check, $#a - 1, 1 if $von;
    # Ignore mid parts (skip if there are non upcased parts (e.g., "-") or '()')
    unless (grep lc eq $_, @check or @a <= 1 or $a =~ /\(|[,;]\s/) {
      my $i = substr($a[0], 0, 1);
      $a[0] =  "$i." if $a[0] =~ /^\w\w/ and lc($i) ne $i;
      # Keep "from" in L. van Beethoven, M. di Falla, I. von Held, J. du Pre
      @a = @a[0,($von ? -2 : ()),-1];
    }
    $a = join ' ', @a;
  }
  $a = join ' ', $a, @date;
  return ref $ini_a ? [$a, $ini_a->[1]] : $a;
}

my %comp;

sub normalize_file_lines ($$) {	# Normalizing speeds up load_composer()
  my ($self, $fn) = @_;
  open my $f, '<', $fn or die "Can't open file $fn for read";
  local $_;
  print "# normalized\n";
  while (<$f>) {
    next if /^#\s*normalized\s*$/;
    chomp;
    $_ = normalize_piece($self, $_) unless /^\s*#/;
    print "$_\n";
  }
  close $f or die "Can't close file $fn for read";
}

sub _significant ($$$) {	# Try to extract "actual name" of the piece
  my ($tbl, $l, $r) = (shift, shift, shift);
  my ($pre, $opus);
  if ($tbl->{no_opus_no}) {	# Remove year-like comment
    ($pre) = ($l =~ /^(.*\S)\s*\(\d{4}\b[^()]*\)$/s);
  } else {
    ($pre, $opus) = ($l =~ /$r/);
  }
  $pre = $l unless $pre;
  my ($significant) = ($pre =~ /^(.*?\bNo[.]?\s*\d+)/is); # Up to No. NN
  ($significant) = ($pre =~ /^(.*?);/s) unless $significant;
  ($significant) = $pre unless $significant;
  (lc $significant, $opus);
}

my $def_opus_rx = qr/\b(?:Op(?:us\b|\.)|WoO)\s*\d+[a-d]?(?:[.,;\s]\s*No\.\s*\d+(?:\.\d+)*)?/;

sub _read_composer_file ($$*$$) {
  my($self, $f, $fh, $tbl, $aka) = (shift,shift,shift,shift,shift);
  my($normalized, $l, @works, %aka, $opened);
  my $opus_rx = $tbl->{opus_rx} || $def_opus_rx;
  my $opus_pref = $tbl->{opus_prefix} || 'Op.';
  local $/ = "\n";		# allow customization
  if (defined $fh) {
    $f |= "composer's file" . (eval {' for ' . $self->name_for_field_normalization} || '');
  } else {
    open COMP, "< $f" or die "Can't read $f: $!";
    $fh = \*COMP;
    $f = "`$f'";
    $opened = 1;
  }
  while (defined ($l = <$fh>)) {
    next if $l =~ /^\s*(?:##|$)/;
    if ($l =~ /^#\s*normalized\s*$/) {
      $normalized++;	# Very significant optimization (unless mail-header)
    } elsif ($l =~ /^#\s*opus_rex\s(.*?)\s*$/) {
      $opus_rx = $tbl->{opus_rx} = qr/$1/;
    } elsif ($l =~ /^#\s*dup_opus_rex\s(.*?)\s*$/) {
      $tbl->{dup_opus_rx} = qr/$1/;
    } elsif ($l =~ /^#\s*opus_prefix\s(.*?)\s*$/) {
      $opus_pref = $tbl->{opus_prefix} = $1;
    } elsif ($l =~ /^#\s*no_opus_no\s*$/) {
      $tbl->{no_opus_no} = 1;
    } elsif ($l =~ /^#\s*opus_dup\s+(.*?)\s*$/) {
      $tbl->{dup_opus}{lc $1} = 1;
    } elsif ($l =~ /^#\s*prev_aka\s+(.*?)\s*$/) {
      $aka->{$1} = $works[-1];	# recognize also alternative names
    } elsif ($l =~ /^#\s*format\s*=\s*(line|mail-header)\s*$/) {
      $/ = ($1 eq 'line' ? "\n" : '');
    } elsif ($l =~ /^#[^#]/) {
      warn "Unrecognized line of $f: $l"
    } elsif ($l !~ /^##/) {	# Recursive call to ourselves...
      if ($normalized) {
	$l =~ s/\s*$//;		# chomp...
      } elsif ($/) {
	$l = normalize_piece($self, $l);
      } else {
	$l = normalize_piece_mail_header($self, $l, $opus_rx, $opus_pref);
      }
      push @works, $l;
    }
  }
  not $opened or close $fh or die "Error reading $f: $!";
  @works;
}

sub read_composer_file ($$;*) {
  my($self, $f, $fh) = (shift,shift,shift);
  $self = prepare_tag_object_comp($self) unless ref $self;
  _read_composer_file($self, $f, $fh,{},{});
}

my @path;
@path = ("$ENV{HOME}/.music_fields")
  if defined $ENV{HOME} and -d "$ENV{HOME}/.music_fields";
push @path, '-';
@path = split /\Q$Config{path_sep}/, $ENV{MUSIC_FIELDS_PATH}
  if defined $ENV{MUSIC_FIELDS_PATH};

sub set_path {
  @path = @_;
}

(my $myself = __PACKAGE__) =~ s,::,/,g; # 'Normalize/Text/Music_Fields.pm'
my @f = $INC{"$myself.pm"};
warn("panic: can't find myself"), @f = () unless -r $f[0];
s(\.pm$)()i or (@f=(), warn "panic: misformed myself") for @f;

sub get_path () {
  map +($_ eq '-' ? @f : $_), @path;
}

sub load_composer ($$) {
  my ($self, $c) = @_;
  eval {$c = $self->shorten_person($c)};
  my $ini = $c;
  return $comp{$ini} if exists $comp{$ini};
  $c =~ s/[^-\w]/_/g;
  $c =~ s/__/_/g;
  # XXX See Wikipedia "Opus number" for more complete logic
  $comp{$ini}{opus_rx} = $def_opus_rx;
  $comp{$ini}{opus_prefix} = 'Op.';
  my @dirs = get_path();
  my @files = grep -r $_, map "$_/$c.comp", @dirs or return 0;
  my $f = $files[0];
#  $f = $c =~ tr( ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ\x80-\x9F)
#	  ( !cLXY|S"Ca<__R~o+23'mP.,1o>...?AAAAAAACEEEEIIIIDNOOOOOx0UUUUYpbaaaaaaaceeeeiiiidnooooo:ouuuuyPy_)
#	    unless -r $f;
  #warn "file looked up is $f";
  return $comp{$ini} unless -r $f;
  my $tbl = $comp{$ini};
  my ($normalized);
  my @works = _read_composer_file($self, $f, undef, $tbl, \my %aka);
  return unless @works;
  # Piano Trio No. 8 (Arrangement of the Septet; Op. 20)); Op. 38 (1820--1823)
  # so can't m/.*?/
  my $r = qr/^(.*($tbl->{opus_rx}))/s;
  # Name "as in Wikipedia:Naming conventions (pieces of music)"
  my (%opus, %name, %dup, %dupop);
  for my $l (@works) {
    my ($significant, $opus) = _significant($tbl, $l, $r);
    if ($significant and $name{$significant}) {
      $dup{$significant}++;
      warn "Duplicate name `$significant': <$l> <$name{$significant}>"
	if $ENV{MUSIC_DEBUG_TABLE};
    }
    $name{$significant} = $l if $significant;
    $opus or next;
    $opus = lc $opus;
    if ($opus{$opus}) {
      $dupop{$opus}++;
      warn "Duplicate opus number `$opus': <$l> <$opus{$opus}>"
	unless $tbl->{dup_opus_rx} and $opus =~ /$tbl->{dup_opus_rx}/
	  or $tbl->{dup_opus}{$opus};
    }
    $opus{$opus} = $l;
  }
  delete $name{$_} for keys %dup;
  delete $opus{$_} for keys %dupop;
  for my $s (keys %aka) {
    my ($n) = _significant($tbl, $s, $r);
    warn "Duplicate and/or unnecessary A.K.A. name `$s' for <$aka{$s}>"
      if $name{$n};
    $name{$n} = $aka{$s};
    $name{"\0$s"} = "\0$n";	# put into values(), see normalize_person()
  }
  $tbl->{works} = \@works;
  $tbl->{opus} = \%opus if %opus;
  $tbl->{name} = \%name if %name;
  $tbl;
}

sub translate_signature ($$$$) { # One should be able to override this
  shift;
  join '', @_;
}
$Normalize::Text::Music_Fields::translate_signature = \&translate_signature;

my %alteration = (dur => 'major', moll => 'minor');
my %mod = (is => 'sharp', es => 'flat', s => 'flat',	# since Es means Ees
	   '#' => 'sharp', b => 'flat');

# XXXX German ==> English (nontrivial): H ==> B, His ==> B sharp, B ==> B flat
# XXXX Do not touch B (??? Check "Klavier" etc to detect German???)
my %key = (H => 'B');

sub normalize_signature ($$$$) {
  my ($self, $key, $mod, $alteration) = @_;
  $alteration ||= ($key =~ /[A-Z]/) ? ' major' : ' minor';
  $alteration = lc $alteration;
  $alteration =~ s/^-?\s*/ /;
  $alteration =~ s/(\w+)/ $alteration{$1} || $1 /e;
  $mod =~ s/^-?\s*/ / if $mod;		# E-flat, Cb
  $mod = lc $mod;
  $mod =~ s/(\w+|#)/ $mod{$1} || $1 /e;
  $key = uc $key;
  $key = $key{$key} || $key;
  &$Normalize::Text::Music_Fields::translate_signature($self,$key,$mod,$alteration);
}

my $post_opus_rex = qr/(?:[\-\/](?=\d)|(?:[,;.]?|\s)\s*(?:\bN(?:[or]|(?=\d))\.?|#|\x{2116}\.?))\s*(?=\d)/;

sub normalize_opus ($$$) {
  my ($self, $op, $no) = (shift, shift, shift);
  my $have_no = ( $op =~ s/\b(?:[,;.]?|\s)\s*(?=No\.\s*\d+)/, / );
  $no = '' unless defined $no;
  # nr12 n12 12 -12 #12 Numero_Sign 12 - but only if $op has no number already!
  $no =~ s/^$post_opus_rex/, No. / unless $have_no;
  # Now the tricky part: normalize the stuff in unknown format;
  # XXXX Now support only "B. NNN" stuff
  $op =~ s/^(\w)(\b|(?=\d))\.?\s*/\U$1. /;
  "$op$no"
}

# 1: prefix ("in" etc.), 2: letter, 3: modifier ("b" etc), 4: alteration: minor etc.
my $signature_rex = qr/(\s*(?:\bin\b|[,;.:]|^|\((?:in\s+)?(?=[-a-zA-Z#\s]+\)))\s*)([a-h])(\s*[b#]|(?:\s+|-)(?:flat|sharp)|[ie]s|(?<=e)s|)((?:(?:\s+|-)(?:major|minor|dur|moll))?)\)?(?=\s*[-;":]|$)/i;

# All these should match in
# mp3info2 -D -a beethoven -t "# 28" ""
#  (should give the same results): "wind in C" "tattoo" "WoO 20"
# "sonata in F#" "piano in F#" "op78" "Op. 10-2" "Op. 10, #2" "sonata #22" "WoO 205-1"

sub find_person ($) {
 my $self = shift;
 eval {$self->name_for_field_normalization} || eval {$self->composer}
   || $self->artist;
}

# See test_normalize_piece()
sub _normalize_piece ($$$$) {
  my ($self, $n, $improve_opus, $by_opus) = (shift, shift, shift, shift);
  my $ini_n = $n;
  $n = $n->[0] if ref $n;		# [value, handler]
  return $ini_n unless $n;
  $n =~ s/^\s+//;
  $n =~ s/\s+$//;
  return $ini_n unless $n;
  $n =~ s/\s{2,}/ /g;

  # Opus numbers
  $n =~ s/\bOp(us\s+(?=\d)|[.\s]\s*|\.?(?=\d))/Op. /gi;	# XXXX posth.???
  $n =~ s/\bN(?:[or]|(?=\d))\.?\s*(?=\d)/No. /gi; # nr12 n12
  $n =~ s/(?<!\w)[#\x{2116}]\s*(?=\d)/No. /gi;	# #12, Numero Sign 12

  my $c = find_person $self;
  my $tbl = ($c and load_composer($self, $c)) || {};
  my $opus_rx = $tbl->{opus_rx} || $def_opus_rx;

  # XXXX Is this `?' for good?
  $n =~ s/(?<=[^(.,;\s])(\s*[.,;])?\s*\b(?=$opus_rx)/; /gi
    if $improve_opus;		# punctuation before Op.

  # punctuation between Op. and No (as in Wikipedia for most expanded listings)
  # $n =~ s/\b((Op\.|WoO)\s+\d+[a-d]?)(?:[,;.]?|\s)\s*(?=No\.\s*\d+)/$1, /gi;
  $n =~ s/($opus_rx)($post_opus_rex\d+)?/ normalize_opus($self, $1, $2) /gie;

  # Tricky part: normalize "In b#"; allow just b# after punctuation too
  $n =~ s/$signature_rex/
    ((not $1 or 'i' eq substr($1,0,1)) ? '' : ' ') . "in "
     . normalize_signature($self,"$2","$3","$4")/ie;
  my $canon;
  {
    $tbl or last;
    # Convert Op. 23-3 to Op. and No
#    my ($o, $no) = ($n =~ /\b(Op\.\s+\d+[a-d]?[-\/]\d+[a-d]?)((?:[,;.]?|\s)\s*(?:No\.\s*\d+))?/);
#    $n =~ s/\b(Op\.\s+\d+[a-d]?)[-\/](\d+[a-d]?)/$1, No. $2/i
#      if $o and not $no and $o !~ /^$opus_rx$/;
    $tbl->{works} or last;
    # XXX See Wikipedia "Opus number" for more complete logic
    my ($opus) = ($n =~ /^.*($opus_rx)/); # at the end (one not in comments!)
    if ($opus and $by_opus) {
      $canon = $tbl->{opus}{lc $opus} or last;
    } else { # $significant: Up to the first "No. NNN.N", or to the first ";"
      my ($significant, $pre, $no, $post) =
	($n =~ /^((.*?)\bNo\b[.]?\s*(\d+(?:\.\d+)*))\s*(.*)/is);
      ($significant) = ($n =~ /^(.*?);/s) unless $significant;
      $significant ||= $n;
      $canon = $tbl->{name}{lc $significant}; # Try exact match
      if (not $canon) {	# Try harder: match word-for-word
	my ($ton, $rx_pre, $rx_post) = ('') x 3;
	my $nn = $n;
	if ($nn =~ s/\b(in\s+[A-H](?:\s+(?:flat|sharp))?\s+(?:minor|major))\b//) {
	  $ton = $1;
	  ($significant, $pre, $no, $post) = # Redo with $nn
	    ($nn =~ /^((.*?)\bNo\b[.]?\s*(\d+(?:\.\d+)*))\s*(.*)/is);
	  ($significant) = ($nn =~ /^(.*?);/s) unless $significant;
	  $significant ||= $nn;
	  $ton = '.*\b' . (quotemeta $ton) . '\b';
	}
	$pre = $significant unless defined $pre;	# Same with No removed
	# my @parts2 = split '\W+', $post;
	if ($pre and $pre =~ /\w/) {
	  $rx_pre = '\b' . join('\b.*\b', split /\W+/, $pre) . '\b';
	}
	if ($post and $post =~ /\w/) {
	  $rx_post = '.*' . join '\b.*\b', split /\W+/, $post;
	}
	# warn "<$no> <$n> <$nn> <$ton> <$rx_pre> <$rx_post>";
	$no = '.*\bNo\.\s*' . (quotemeta $no) . '\b(?!\.\d)' if $no;
	$no = '' unless defined $no;
	last unless "$rx_pre$no$ton$rx_post";
	my $sep = $tbl->{no_opus_no} ? '' : '.*;';
	my $rx = qr/$rx_pre$no$ton$rx_post$sep/is;
	my @matches = grep /$rx/, values %{$tbl->{name}};
	if (@matches == 1) {
	  $canon = $matches[0];
	} elsif (!@matches) {
	  last;
	} else { # Many matches; maybe the shortest is substr of the rest?
	  my ($l, $s, $diff) = 1e100;
	  $l > length and ($s = $_, $l = length) for @matches;
	  $s eq substr $_, 0, $l or ($diff = 1, last) for @matches;
	  last if $diff;
	  $canon = $s;
	}
	$canon = $tbl->{name}{$canon} if $canon =~ s/^\0//s; # short name
      }
    }
#    if ($canon) {
#      my (%w, %w1);
#      for my $w (split /[-.,;\s]+/, $canon) {
#	$w{lc $w}++;
#      }
#      for my $w (split /[-.,;\s]+/, $n) {
#	$w1{lc $w}++ unless $w{lc $w};
#      }
#      if (%w1) {
#	warn "Unknown words in title: `", join("` '", sort keys %w1), "'"
#	  unless $ENV{MUSIC_TRANSLATE_FIELDS_SKIP_WARNINGS};
#	last
#      }
#    }
    $n = $canon;	# XXXX Simple try (need to compare word-for-word)
  }
  return ref $ini_n ? [$n, $ini_n->[1]] : $n;
}

sub normalize_piece ($$) {
  _normalize_piece(shift, shift, 'improve opus', 'by opus');
}

sub opus_parser ($) {
  my $tag = shift;
  my $c = find_person $tag;
  my $tbl = ($c and load_composer($tag, $c));
  my $opus_rx = $tbl->{opus_rx} || $def_opus_rx;
  my $opus_pre = $tbl->{opus_prefix} || 'Op.';
  ($opus_rx, $opus_pre, $c)
}

sub full_opus ($$;$$) {
  my ($tag, $short, $opus_rx, $opus_pref) = (shift, shift, shift, shift);
  ($opus_rx, $opus_pref) = opus_parser($tag) unless $opus_rx;

  $short = "$opus_pref $short" if $short =~ /^\d/ and not $short =~ /$opus_rx/;
  $short =~ s/^($opus_rx)($post_opus_rex\d+)?/ normalize_opus($tag, $1, $2) /gie;
  $short
}

# Currently used Title-* fields: RAW, Opus, Dates, Key, Name, Related-Name,
# Alternative-Name, Punct, Type, Count, For, Type-After-Name, In-Movements
# Related-On, Comment, Related-After, Name-By-First-Row
## [When new added, change also the "merging" logic in merge_info().]
sub normalize_mail_header_line ($$;$$) {
  my ($tag, $in, $opus_rx, $opus_pref) = (shift, shift, shift, shift);
  my ($t, $v) = $in =~ /^([-\w]+):\s*(.*)$/s or die;
  $v = "($v)" if $t eq 'Title-Dates';
  $v = full_opus $tag, $v, $opus_rx, $opus_pref
    if $t eq 'Title-Opus' and $v =~ /(^\d|[\-\/])/;
  $v = "; $v" if $t eq 'Title-Opus';
  $v = qq("$v") if $t =~ /^Title(-Related)?-Name$/;
  $v = qq(["$v"]) if $t =~ /^Title-Name-By-First-Row$/;
  $v = qq(; "$v") if $t eq 'Title-Alternative-Name';
  $v =~ s/^(in\s+)?/in /i if $t =~ 'Title-Key';
  $v = "No. $v" if $t eq 'Title-No';
  $v = "for $v" if $t eq 'Title-For';
  $v = "on $v" if $t eq 'Title-Related-On';
  $v = "(lyrics by $v)" if $t eq 'Title-Lyrics-By';
  $v = ", $v" if $t eq 'Title-Type-After-Name';
  $v;
}

## perl -wple "BEGIN {print q(# format = mail-header)} s/#\s*normalized\s*$//; $_ = qq(Title: $_) unless /^\s*(#|$)/; $_ = qq(\n$_) if $p and not /^##/; $_ .= qq(\n) unless $p = /^##/" Normalize::Text::Music_Fields-G_Gershwin.comp >Music_Fields-G_Gershwin.comp-mail
sub normalize_piece_mail_header ($$;$$) {
  my ($tag, $in, $opus_rx, $opus_pref) = (shift, shift, shift, shift);
  return $1 if $in =~ /^Title:\s*(.*?)\s*$/m;
  my @pieces = map normalize_mail_header_line($tag, $_, $opus_rx, $opus_pref),
    grep /^Title-[-\w]+:\s/, split /\n/, $in;
  for my $i (1 .. @pieces - 1) {
    $pieces[$i-1] .= ' '
      unless $pieces[$i-1] =~ /[\(\[\{]$/ or $pieces[$i] =~ /^[\)\]\}.,;:?!]/;
  }
  return join '', @pieces;
}

sub shorten_opus ($$$$) {		# $mp3, $str, $pre
  my ($tag, $op, $pref, $rx) = (shift, shift, shift, shift);
  my ($out, $cut) = ($op, '');
  if ($out =~ s/^\Q$pref\E\s*(?=\d)//) {
    if ($out =~ $rx) {	# back up if shortened version causes confusion
      $out = $op;
    } else {
      $cut = $pref;
    }
  }
  my $out1 = $out;
  if ($out =~ s/(\d[a-i]?),\s+No\.\s*(?=\d)/$1-/) {
    my $o = full_opus($tag, $out, $rx, $pref);
    if ($op ne $o or $out =~ /^$rx$/) {	# check again
      $out = $out1;
      unless ($out eq $op) {			# Extra sanity check
	$o = full_opus($tag, $out, $rx, $pref);
	$out = $op unless $op eq $o;
      }
    }
  }
  $out
}

my $main_instr = join '|', qw(Piano Violin Viola Cello Horn String Wind Harp
			      Instrument Clarinet Alto);
my $for_instr = join '|', qw(Mandolin Harpsichord chorus soprano alt bass
    basses tenor mezzo-soprano \(mezzo\)soprano baritone contralto hand
    soli soloists woodwinds celesta accordion instrumentalists large small
    double violoncello clarinet oboe english french bassoon trombone organ
    flute voice orchestra military band chamber symphonic symphony electric
    percussion double-bass vibraphone pantomime instrumental ensemble tape
    timpani bells keyboard guitar triple percussionist counter-tenor alto
    counter-alto male female children's boys' mixed a capella cappella choir
    basssoli chamberorchestra metronome triangle harmonium trumpet);
my $multiplets = join '|', qw(solo duo duet trio quartet quintet sextet septet octet);
my $pieces = join '|', qw(Serenada Serenade Romance Song Notturno Aria Mass
    Allemande Chorus Allegretto Rondo Opera Fantasia Polonaise Contredanse
    Prelude Andante Cadenza Bagatelle Cantata Aria Joke Waltz Waltzes Minuet
    Ländler March Rondino Variations Equali Fugue Piece Symphony Sonata
    Concerto Sonatina Dance Mignon Fantasy Scherzo Polka Moderato Fragment
    Transcription Orchestration Suite Music Reduction Passacaglia Arrangement
    accompaniment choral score Operetta Ballet oratorio Choruses Intermezzo
    Overture Dialogue Epilogue Aphorism Monologue Gallop Interlude
    Re-orchestration Reorchestration Cycle Potpourri Nocturne Capriccio
    Mazurek Mazurka Impromptu Humoresque Ballade Ballads Gavotte Requiem
    Fanfares Motet Rhapsodies Rhapsody Intermezzi Poem Marches Theme
    Melody);

my $numb_rx = qr/one|two|three|four|five|six|seven|eight|nine/i;

my $count_rx = qr/ \d+
		 | (?:$numb_rx)(?:teen)?
		 | ten|eleven|twelve|thirteen|fifteen|eighteen
		 | (?:twenty|thirty|fourty|fifty|sixty|seventy|eighty|ninety)
		   (?: (?:\s+ | -) (?:$numb_rx) )? /ix;

#no utf8;			# `use' is needed by 5.005

my $for_rx = qr/ (?:\s+|^)
		 for
		 (?: (?:\s+|(?<=\/)) \(?
		     (?:and|or|&|vocal\s+soloist|$main_instr|$for_instr|prepared\s+piano|magnetic\s+tape|stage\s+orchestra|jazz\s+ensemble|(?:vocal\s+)?(?:$multiplets)|$count_rx|[23456789]|[12345]\d|Große Fuge)
		     (?:s|\(s\))? \)?
		     [,\/]?
		   )+
	       /ix;

my $piece_rx = qr/ (?: (?:Transcription|Orchestration|Reduction|Arrangement|Suite|Instrumentation|Re-?orchestration)
		     \s+ of
		     (?: \s+ (?: $main_instr | the | $count_rx ) )?
		     \s+ )? # Mod
		   (?:
		     (?: $main_instr | Vocal | secular | sacred
		     | Double | Triple | Easy | Trio | Symphonic )
		     \s+ )?	# Prefix
		   (?:Concerto\s+grosso | $multiplets
		   | Ecossaise?
		   | (?:[123456]-part\s+)? (?:riddle\s+)? Canon
		   | (?:sets\s+of\s+)? (?: chorale\s+preludes? | $pieces )
                     (?: s? \s* (?:\band\b|&) \s* (?:$pieces))?
		   | Incidental\s+music | electronic\s+composition
		   | chorale\s+prelude
		   | Musical\s+greetings? | choral\s+score | vocal\s+quartet
		   | (?:heroic|comic|tragic|historical)\s+opera
		   | scenic\s+composition | symphonic\s+poem ) # Main type
		   (?: s? \s+ in \s+ (?:$numb_rx) \s+ act )?
		 /ix;

#use utf8;			# needed by 5.005

my $name_rx = qr/ (?: [A-Z]\w* \.? \s+)* [A-Z][-\'\w]+ /x;

my $rel_piece_rx = # Two Pieces for Erwin Dressel's Opera "Armer Columbus"
  qr/ \b
      (?:to|from|of|a\s+fter|for|on(?:\s+motives\s+of)?)
      (?:
	\s+ (?: \s+ music \s+ to)? (?: the | $name_rx\'s ) # Erwin Dressel's
	(?: \s+ (?: (?:(?:silent|animated)\s+)? film | spectacle | comedy
	  | TV[-\s]+production | music\s+to\s+the\s+film
	  | play | (?:Chamber-?\s*)? opera | stage \s+ revue | novel))?)? \b
    /ix;


sub strip_known_from_end ($$$) {
  my ($tag, $in, $try_key, @tail) = (shift, shift, shift);
  # E.g., when the second name is based on the first line of lyrics:
  unshift @tail, "Title-Lyrics-By: $1" if $in =~ s/\s+\(lyrics\s+by\s+([^()]+)\)$//;
  unshift @tail, "Title-Alternative-Name: $4"
    while $in =~ s/^(.*?".*?".*)(\s*[.:,;])?\s+(?(2)|(?=\())(\()?"([^\"]+)"(?(3)\)|)$/$1/;

  # Too much recognized as this if ???
  while ( $in =~ s/ \s* ( $rel_piece_rx | (?!$) [.:,;]? )
		    (?: \s+
		      ( (\[)? ["\x{201E}]([^\"\x{201C}\x{201E}]+)["\x{201C}] (?(3) \] | )
                      | \(["\x{201E}]([^\"\x{201C}\x{201E}]+)["\x{201C}]\) )) $
		  //xo ) {
    if (length $1 <= 1) {
      unshift @tail, "Title-Name: $+";
    } else {
      unshift @tail, "Title-Related-Name: $+" if $2;
      unshift @tail, "Title-Related-How: $1";
    }
  }
  unshift @tail, "Title-Related-By: after $1"
    if $in =~ s/ \s* after \s+ ($name_rx) $//xo;

  unshift @tail, "Title-Related-On: $+"	# Variation and Fugue
    if $in =~ s/ ( \b variations? (?: \s+ and \s+ $piece_rx)? (?:$for_rx)? )
		 \s+ on \s+	# on a Hungarian melody
                 (an? \s+ (?: (?: $name_rx | original ) \s+)? $piece_rx
                   (?: \s+ by \s+ $name_rx)? )$/$1/xio;	# XXXX Why $+ needed?

  unshift @tail, "Title-In-Movements: $1"
    if $in =~ s/\s*(in\s+(a\s+single|$numb_rx|\d)\s+(movement|episode)s?)$//;

  unshift @tail, "Title-Key: " . normalize_signature($tag, "$2", "$3", "$4")
    if $in =~ s/\s*$signature_rex$//;
  if ($in =~ s/\s*([.,;:])?\s+No\.\s*(\d+[a-d]?(\.\d+)?)$//i) {
    unshift @tail, "Title-No: $2";
    unshift @tail, "Title-Punct: $1" if $1;
  }

  unshift @tail, "Title-Key: " . normalize_signature($tag, "$2", "$3", "$4")
    if $try_key and $in =~ s/[:;,]?\s*$signature_rex$//;

  my $f;
  ($f = $1) =~ s/^\s*for\s*//, unshift @tail, "Title-For: $f"
    if $in =~ s/($for_rx)$//io;	# XXXX: foo arranged for piano ???

  if ($in =~ s/\s*([.,;:])?\s+No.\s*(\d+[a-d]?(\.\d+)?)$//i) {	# Repeat
    unshift @tail, "Title-No: $2";
    unshift @tail, "Title-Punct: $1" if $1;
  }

  ($in, @tail);
}

sub parse_piece ($$$$$$$);	# Predeclaration for recursive call without ()
sub parse_piece ($$$$$$$) {
  my ($after_name, $at_end, $at_start, $tag, $in, $opus_pref, $opus_rx, @tail)
    = (shift, shift, shift, shift, shift, shift, shift);
  if ($at_end) {
    unshift @tail, "Title-Dates: $2"
      if $in =~ s/(.*\S)\s*\(([^()]*\b\d{4}\b[^()]*)\)$/$1/ # $1 makes greedy
	or $at_end and not $at_start and
	  $in =~ s/^()\s*\(([^()]*\b\d{4}\b[^()]*)\)$/$1/; # $1 makes greedy
    unshift @tail, "Title-Opus: " . shorten_opus($tag, "$2", $opus_pref, $opus_rx)
      while $in =~ s/(.*);\s+($opus_rx)\s*$/$1/;
    unshift @tail, "Title-Key: " . normalize_signature($tag, "$2", "$3", "$4")
      if $in =~ s/\s*$signature_rex$//;
  }
  ($in, my @r) = strip_known_from_end($tag, $in, 'look for key');
  unshift @tail, @r;

  # Now recognize comment as everything after a key (except, maybe, name)
  if ($in =~ /^(.*\S)\s*$signature_rex\s*(?:"([^\"]+)"\s*)?(?:([.,:;])\s)?(.*)$/) {
    $in = $1;
    my $k = normalize_signature($tag, "$3", "$4", "$5");
    my($n,$rest) = ($6, $8);
    if (length $rest) {{		# Localize match
      unshift @tail,
	'Title-'. ($8 =~ /^[^\s\w]$/ ? 'Punct' : 'Comment'). ": $rest";
    }}
    unshift @tail, "Title-Punct: $7" if $7;
    my $alt = ($in =~ /".*"/ ? '-Alternative' : '');
    unshift @tail, "Title$alt-Name: $n" if defined $n and length $n;
    unshift @tail, "Title-Key: $k";
  }

  # Now repeat looking for known fields
  ($in, @r) = strip_known_from_end($tag, $in, not 'look for key');
  unshift @tail, @r;

  if ($at_start) {		#  and (@tail or not $at_end)
    unshift @tail, "Title-Type: $1" if $in =~ s/^($piece_rx s?)\s*$//iox;
    unshift @tail, "Title-Count: $1" , "Title-Type: $2"
      if $in =~ s/^($count_rx)\s+( $piece_rx s?)\s*$//iox;
    unshift @tail, "Title-Count: $1"
      if $in =~ s/^($count_rx)\s*$//iox;
  }
  if (not @tail and $at_start and $at_end) {
    unshift @tail, "Title: $in";
  } elsif (not length $in) {	# Do nothing
  } elsif ($in =~ /^\s*[-,:;.()\[\]{}]\s*$/) {
    unshift @tail, "Title-Punct: $in";
  } elsif ($after_name and $in =~ /^(by|after)((\s+and)?\s+[A-Z][-\'\w]+)+\s*$/) {
    unshift @tail, "Title-Related-By: $in";
  } elsif ($after_name and $in =~ /^([-,;:])\s+($piece_rx s?)\s*$/iox) {
    unshift @tail, "Title-Type-After-Name: $2";
  } elsif ($at_start and $in =~ /^"([^\"]+)"\s*$/iox) {
    unshift @tail, "Title-Name: $1";
  } else {
    if ($at_start and $in =~ /^"([^\"]+)"[,.;:]\s*(\S.*?)\s*$/) {
      my $name = $1;		# Pretend we are at start:
      my @rest = parse_piece 'after_name', ($at_end and not @tail), 'start',
	$tag, "$2", $opus_pref, $opus_rx;
      unshift @rest, "Title-Punct: ,"
	unless $rest[0] =~ s/^Title-Type:/Title-Type-After-Name:/;
      return("Title-Name: $name", @rest, @tail)
	unless (join "\n", '', @rest) =~ /\nTitle-RAW:/;
    }
    unshift @tail, "Title-RAW: $in";
  }
  @tail;
}

my %html_esc = qw( amp & lt < gt > );

sub naive_format ($$$) { # Used to find glaring errors in conversion only
  my ($tag, $in, $opus_rx, $opus, @out) = (shift,shift,shift);
  $in =~ s/^($opus_rx)\n/$1: /;
  my @in = split /\s*\n\s*/, $in;
  if ($in[0] =~ s/^($opus_rx)[:,]\s*/Title-RAW: /) {
    ($opus = $1) =~ s/^Opus\b/Op./;
  }
  for my $l (@in) {
    if ($l =~ s/^Title-Bold:\s*//) {
      push @out, qq("$l");
    } elsif ($l =~ s/^Title-Opus:\s*//) {
      push @out, '; ' . full_opus $tag, "$l";
    } elsif ($l =~ s/^Title-Dates:\s*//) {
      push @out, "($l)";
    } elsif ($l =~ s/^X-\w[-\w]*:\s*//) { # Do nothing
    } elsif ($l =~ s/^Title-(RAW|Comment):\s*//) {
      push @out, $l if length $l;
    } else {
      warn "Naive formatting: Unknown line format `$l'"
    }
  }
  if (defined $opus) {
    my @year;
    @year = $1 if @out and $out[-1] =~ s/\s*(\([^()]*\b\d{4}\b[^()]*\))$//;
    pop @out unless @out and length $out[-1];
    push @out, "; $opus", @year;
  }
  for my $n (1..$#out) {
    $out[$n] =~ s/^(?![.,;:])/ /;
  }
  join '', @out
}

# Convert from line-format to mail-header format:
## perl -MNormalize::Text::Music_Fields -wlne   "BEGIN {$tag = Normalize::Text::Music_Fields::prepare_tag_object_comp(shift @ARGV); print q(# format = mail-header)} print Normalize::Text::Music_Fields::emit_as_mail_header($tag,$_, 0,$pre)" gershwin Music_Fields-G_Gershwin.comp-line >Music_Fields-G_Gershwin.comp-mail1
# (inverse transformation:) Dump pieces listed in mail-header format
## perl -MNormalize::Text::Music_Fields -wle "print for Normalize::Text::Music_Fields::read_composer_file(shift, shift)" gershwin Music_Fields-G_Gershwin.comp-mail > o
#
## perl -MNormalize::Text::Music_Fields -00wnle "BEGIN {$tag = Normalize::Text::Music_Fields::prepare_tag_object_comp(shift @ARGV); print q(# format = mail-header)} print Normalize::Text::Music_Fields::emit_as_mail_header($tag,$_, q(bold,xml,opus),$pre)" shostakovich  o-xslt-better >Music_Fields-D_Shostakovich.comp-mail1
## perl -MNormalize::Text::Music_Fields -wnle "BEGIN {$tag = Normalize::Text::Music_Fields::prepare_tag_object_comp(shift @ARGV); print qq(# format = mail-header\n)} print Normalize::Text::Music_Fields::emit_as_mail_header($tag,$_, q(opus), $pre)" schnittke  o-schnittke-better  >Music_Fields-A_Schnittke.comp-mail2
sub emit_as_mail_header ($$$$) { # $mp3, $str, $has_bold_parts_etc, $pre [R/W]
  my ($tag, $in, $preformatted) = (shift, shift, shift);
  $in =~ s/#\s*normalized\s*$//;
  #return "\n" if $in =~ /^\s*$/;
  my @out;
  unless ($in =~ /^\s*(#|$)/) {
    return "\n\n" if $preformatted and $in =~ /^<\?xml\b/;
    my $ini = my $ini_raw = $in;
    $in =~ s/&(amp|lt|gt);/$html_esc{$1}/g if $preformatted =~ /\bxml\b/;
    $in =~ s/&#x([\da-f]+);/chr hex $1/gei if $preformatted =~ /\bxml\b/;

    my ($opus_rx, $opus_pre) = opus_parser($tag);

    my $have_op = ($in =~ /^$opus_rx:/);
    # When $use_only_opus, all the text but Opus-No is ignored; bad for update
    my $use_only_opus = ($preformatted =~ /\bonly_by_opus\b/);
    $in = _normalize_piece($tag, $in, !$have_op, $use_only_opus)
      unless $preformatted =~ /\bbold\b/;

    $ini = naive_format($tag, $in, $opus_rx) if $preformatted =~ /\b(opus|bold)\b/;
    my @op;
    my $prefix = ($preformatted =~ /\bbold\b/ ? 'Title-RAW: ' : '');
    if ($in =~ s/^($opus_rx)(?:[:,](?:[ \t]+|(?=\n))|\n\s*)/$prefix/) {
      my $op = $1;
      my $o_pre = $opus_pre;
      $o_pre = 'Opus' if $op =~ /^Opus\b/;
      @op = "Title-Opus: " . shorten_opus($tag, $op, $o_pre, $opus_rx);
    } elsif ($preformatted =~ /\bopus\b/) {
      warn "Expected to start with `Opus NUMBER: ': <<<$in>>>";
    }
    if ($preformatted =~ /\bbold\b/) {
      my @parts = split /\s*\n\s*/, $in;
      my ($after_for, $after_name);
      for my $n (0..$#parts) {
	my $p = $parts[$n];
	$p =~ s/\s+$//;
	if ($p =~ s/^Title-Bold:\s*//) {
	  my $rel = $after_for ? '-Related' : '';
	  push @out, "Title$rel-Name: $p";
	  $after_for = 0, $after_name = 1;
	  next;
	} elsif ($p =~ /^Title-RAW:\s*$/) { # Do nothing
	  next;
	} elsif ($after_for =
		 ($n != $#parts and $parts[$n+1] =~ /^Title-Bold:\s*/
		  and $parts[$n] =~ /^Title-RAW:\s*/
		  # Title-RAW: Two Pieces for Erwin Dressel's Opera "Armer Columbus"
		  and $p =~ s/ \s* ( $rel_piece_rx \s*$ )//ixo)) {
	  my $how = $1;
	  $p =~ s/^Title-RAW:\s+//
	    or warn "Expected to start with Title-RAW: <<<$p>>>";
	  push @out,
	    parse_piece $after_name,!'end', !$n, $tag, $p, $opus_pre, $opus_rx;
	  push @out, "Title-Related-How: $how";
	} elsif ($p =~ s/^Title-Opus:\s+// ) {
	  push @out, 'Title-Opus: ' . full_opus $tag, $p, $opus_rx, $opus_pre;
	  $after_name = 0;
	} elsif ($p =~ /^(Title-(Opus|Comment|Dates)|X-Title-Opus-Alt):\s+/ ) { # Keep intact
	  push @out, $p;
	  $after_name = 0;
	} else {
	  $p =~ s/^Title-RAW:\s+// or warn "Expected to start with `Title-RAW: ': <<<$p>>>";
	  push @out, parse_piece $after_name, $n==$#parts, !$n, $tag, $p, $opus_pre, $opus_rx;
	  $after_name = 0;
	}
      }
    } else {
      @out = parse_piece 0, 'at_end', 'at_start', $tag, $in, $opus_pre, $opus_rx;
    }
    my @y;
    unshift @y, pop @out while $out[-1] =~ /^Title-Dates:\s/;
    push @out, @op, @y;
    $out[0] =~ s/^Title:/Title-RAW:/ if @out > 1; # Opus 1: foo
    $in = join "\n", @out, ($preformatted =~ /\bbold\b/ ? ('','') : ()); # \n\n

    my $res = normalize_piece_mail_header($tag, $in, $opus_rx, $opus_pre);
    warn "# Mismatch:\n# in  = $ini\n# out = $res\n#rawin= $ini_raw\n" unless $res eq $ini;
  }
  $in = "\n$in" if $in !~ /^\s*##/ and $_[0] and not $preformatted =~ /\bbold\b/;
  $in .= qq(\n) unless $preformatted =~ /\bbold\b/ or $_[0] = ($in =~ /^##/);
  $in;			# Caller appends extra \n
}

## perl -MNormalize::Text::Music_Fields -wnle "BEGIN {$tag = Normalize::Text::Music_Fields::prepare_tag_object_comp(shift @ARGV); print qq(# format = mail-header\n)} next unless s/^\s*\+\+\s*//; print Normalize::Text::Music_Fields::merge_info($tag,$_, q(opus))" brahms o-brahms-op-no1-xslt
sub merge_info ($$$;$$) {	# $update not fully implemented
  my ($tag, $in, $preformatted, $soft, $update) = (shift, shift, shift, shift, shift);
  my $parsed = emit_as_mail_header($tag, $in, $preformatted, my $pre);
  my $op_n = ($parsed =~ /^Title-Opus: (.*)/m and $1);
  die "Can't find opus number in `$in'" unless defined $op_n;
  my $op_no = full_opus $tag, $op_n;

  $parsed =~ s/^Title-Punct:\s*-\nTitle-Name:/Title-Name-By-First-Row:/;
  $soft ||= qr(^(?!));		# Never match
  warn "Opus [$op_n]: Type `$1' interpreted as Title-Name\n"
    if $op_n =~ $soft and $parsed =~ s/^Title-Type:/Title-Name:/m
      and $parsed =~ /^Title-Name:\s*(.*)/;
  warn("Too many fields in `$parsed', skipping"), return ''
    if $parsed =~ /^(?=.)(?!Title-(?:Opus|RAW|Name(?:-By-First-Row)?|Key|Dates):)/m;

  my $name = normalize_piece $tag, $op_no; # expand opus+no to the full name

  if ($name eq $op_no) {	# No current information
    my ($opus_rx, $opus_pre) = opus_parser($tag);
    die "No subopus number in `$op_no' (from `$in')"
      unless $op_no =~ /^($opus_rx)\s*[.,:;]\s*No/;
    my $op = $1;
    $name = normalize_piece $tag, $op; # Expands opus to the full name
    $update = 0;
  } elsif (not $update) {
    die "Opus `$op_no' already known: `$name'";
  }

  my $parsed_op = emit_as_mail_header($tag, $name, 'only_by_opus', my $pre1);
  warn("Prior knowledge not found for `$in'\n"),
    return $parsed if $parsed_op =~ /^Title:/; # Not found, or not parsable

  unless ($update) {		# Handling "a group name"
    $parsed_op =~ s/^Title-Count:.*\n//; # Four ballades for piano
    if ($parsed_op =~ /^Title-Type:\s*(.*)\n/) { # Strip the plural
      my $type = $1;
      $type =~ s/^ Sets \s+ of \s+/Set of /x
	or $type =~ s/^ ($piece_rx) (?:s | es) $/$1/x; # Strip the plural
      $parsed_op =~ s/^.*/Title-Type: $type/;
    }
    $parsed_op =~ s/^Title-Opus:.*/Title-Opus: $op_n/m
      or die "Can't find Opus: `$parsed_op'";
  }
  if ($parsed =~ /^Title-Dates:\s*(.*)/m) {
    my $d = $1;			# (?<!.) does as /^/m, but matches at end too
    $parsed_op =~ s/(?<!.)(Title-Dates:.*\n|\Z)/Title-Dates: $d\n/ or die;
  }
  if ($parsed =~ /^Title-Key:\s*(.*)/m) {
    my $k = $1;
    die "Key mismatch: $k vs $1"
      if $parsed_op =~ /^Title-Key:\s*(.*)/m and $1 ne $k;
    # XXXX Where put the key?  STD orders: Type/No/Key/For or Type/For/No/Key
    # There is also (beeth) Type/For/Related-On/Key???  Type/For/Key???
    $parsed_op =~ s/(?<!.)(?=Title-(?!(?:Type|For|Related-On|No):)|\Z)/Title-Key: $k\n/ or die;
  }
  if ($parsed =~ s/^Title-RAW:/Title-Name:/m) {
    (my $n) = ($parsed =~ /^Title-Name:\s*(.*)/m);
    warn "Title-RAW `$n' interpreted as Title-Name in `$in'\n";
  }
  if ($parsed =~ /^(Title-Name(?:[-\w]*):\s*.*)/m) { # pre: Type-After-Name, In-Movements
    my $n = $1;			# Related-On, Comment, Related-After
    $parsed_op =~ s/(?<!.)(?=Title-(?:Type-After-Name|In-Movements|Related-On|Comment|Related-After|Opus|Dates):|\Z)/$n\n/ or die;
  }
  $parsed_op
}

for my $field (qw(album title title_track)) {
  no strict 'refs';
  *{"normalize_$field"} = \&normalize_piece;
}

# perl -Ii:/zax/bin -MNormalize::Text::Music_Fields -wle "BEGIN{binmode $_, ':encoding(cp866)' for \*STDIN, \*STDOUT, \*STDERR}print Normalize::Text::Music_Fields->check_persons"
sub check_persons ($) {
  my $self = shift;
  my %seen;
  $seen{$_}++ for values %tr;
  for my $l (keys %seen) {
    my $s = short_person($self, $l);
    my $ll = normalize_person($self, $s);
    warn "`$l' => `$s' => `$ll'" unless $ll eq $l;
  }
  %seen = ();
  $seen{$_}++ for values %short;
  for my $s (values %seen) {
    my $l = normalize_person($self, $s);
    my $ss = short_person($self, $l);
    warn "`$s' => `$l' => `$ss'" unless $ss eq $s;
  }
}

my %aliases;

sub load_lists () {
 my @dirs = get_path();
 my @lists = map <$_/*.lst>, @dirs;
 #warn "dirs=`@dirs', lists=`@lists'\n";
 warn("panic: can't find name lists in `@dirs'"), return 0 unless @lists;

 for my $f (@lists) {
  local $/ = "\n";
  open F, "< $f" or warn("Can't open `$f' for read: $!"), next;
  my @in = <F>;
  close F or warn("Can't close `$f' for read: $!"), next;
  my $charset;
  for (@in) {
   next if /^\s*$/;
   if ( /^ \s* \# \s* (?:charset|encoding) \s* = \s* ("?) (.*?) \1 \s* $/ix) {
     $charset = $2;
     require Encode;
     next;
   }
   $_ = Encode::decode($charset, $_) if $charset; # Make empty to disable
   s/^\s+//, s/\s+$//, s/\s+/ /g;
   next if /^##/;
   if (/^ \# \s* (alias|fix|shortname_for) \s+ (.*?) \s* => \s* (.*)/x) {
     if ($1 eq 'alias') {
       $aliases{$2} = [split /\s*,\s*/, $3];
     } elsif ($1 eq 'fix') {
       my ($old, $ok) = ($2, $3);
       $tr{translate_dots $old} = $tr{translate_dots $ok} || $ok;
       #print "translating `",translate_dots $old,"' to `",translate_dots $ok,"'\n";
     } elsif ($1 eq 'shortname_for') {
       my ($long, $short) = ($2, $3);
       $tr{translate_dots $short} = $long;
       ($long) = strip_years($long);
       $short{$long} = $short;
     }
     next;
   }
   if (/^ \# \s* fix_firstname \s+ (.*\s(\S+))$/x) {
     $tr{translate_dots $1} = $tr{translate_dots $2};
     next;
   }
   if (/^ \# \s* keep \s+ (.*?) \s* $/x) {
     $tr{translate_dots $1} = $1;
     next;
   }
   if (/^ \# \s* shortname \s+ (.*?) \s* $/x) {
     my $in = $1;
     my $full = __PACKAGE__->_translate_person($in, 0);
     unless (defined $full and $full ne $in) {
       my @parts = split /\s+/, $in;
       $full = __PACKAGE__->_translate_person($parts[-1], 0);
       warn("Can't find translation for `@parts'"), next
         unless defined $full and $full ne $parts[-1];
       # Add the normalization
       my $f = __PACKAGE__->normalize_person($parts[-1]);
       $tr{translate_dots $in} = $f;
     }
     $short{$full} = $in;
     ($full) = strip_years($full);
     $short{$full} = $in;
     next;
   }
   warn("Do not understand directive: `$_'"), next if /^#/;
   #warn "Doing `$_'";
   my ($pre, $post) = /^(.*?)\s*(\(.*\))?$/;
   my @f = split ' ', $pre or warn("`$pre' won't split"), die;
   my $last = pop @f;
   my @last = $last;

 #  no utf8;			# `use' is needed by 5.005
   (my $ascii = $last) =~
         tr( ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ\x80-\x9F)
           ( !cLXY|S"Ca<__R~o+23'mP.,1o>...?AAAAAAACEEEEIIIIDNOOOOOx0UUUUYpbaaaaaaaceeeeiiiidnooooo:ouuuuyPy_);
   push @last, $ascii unless $ascii eq $last;
   my $a = $aliases{$last[0]} ? $aliases{$last[0]} : [];
   $a = [$a] unless ref $a;
   push @last, @$a;
   for my $last (@last) {
     my @comp = (@f, $last);
     $tr{"\L@comp"} ||= $_;
     $tr{lc $last} ||= $_;		# Two Bach's
     if (@f) {
       $tr{"\L$f[0] $last"} ||= $_;	# With the first of pre-names only
       my @ini = map substr($_, 0, 1), @f;
       $tr{"\L$ini[0] $last"} ||= $_;	# One initial
       $tr{"\L@ini $last"} ||= $_;	# All initials
     }
   }
  }
 }
}

#$tr{lc 'Tchaikovsky, Piotyr Ilyich'} = $tr{lc 'Tchaikovsky'};

sub prepare_tag_object_comp ($;$) {
  my ($comp, $piece) = @_;
  require MP3::Tag;
  my $tag = MP3::Tag->new_fake('settable');

  for my $elt ( qw( title track artist album comment year genre
                    title_track artist_collection person ) ) {
    no strict 'refs';
    MP3::Tag->config("translate_$elt", \&{"Normalize::Text::Music_Fields::normalize_$elt"})
      if defined &{"Normalize::Text::Music_Fields::normalize_$elt"};
    # This is needed to expand albums, since pieces file is named so...
    MP3::Tag->config("short_person", \&Normalize::Text::Music_Fields::short_person)
        if defined &Normalize::Text::Music_Fields::short_person;
  }
  $tag->config('parse_data', ['mi', $comp, '%a'], ($piece ? ['mi', $piece, '%l'] : () ));
  $tag;
}

## perl -MNormalize::Text::Music_Fields -e Normalize::Text::Music_Fields::test_normalize_piece
sub test_normalize_piece {
  for (split /\n/, <<EOS) {
beethoven # 28
beethoven wind in C
beethoven tattoo
beethoven WoO 20
beethoven sonata in F#
beethoven piano in F#
beethoven op78
beethoven Op. 10-2
beethoven Op. 10, #2
beethoven sonata #22
beethoven WoO 205-1
beethoven WoO 205, No 1
beethoven WoO 205, No. 1
beethoven WoO 205, no 1
beethoven WoO 205;#1
beethoven WoO 205, no1
beethoven WoO 205 #1
beethoven WoO 205#1
beethoven WoO 205. #1
- beethoven WoO 205,-1
- beethoven WoO 205, -1
- beethoven WoO 205 -1
- beethoven WoO 205 1
- beethoven WoO 205;1
EOS
    my $match = (s/^-\s*// ? '-' : '+');
    s/^(\w+)\s+//;
    my $tag = prepare_tag_object_comp("$1", $_);
    print "$match ", find_person($tag), " ", $tag->album, "\n";
  }
}

for my $elt ( qw( title track artist album comment year genre
		  title_track artist_collection person ) ) {
  no strict 'refs';		# backward compatibility layer:
  *{"translate_$elt"} = \&{"normalize_$elt"} if defined &{"normalize_$elt"};
}

1;

=head1 NAME

Normalize::Text::Music_Fields - normalize names of people's and (musical) works.

=head1 SYNOPSIS

   $name = $obj->Normalize::Text::Music_Fields::normalize_person($name);
   $work = $obj->Normalize::Text::Music_Fields::normalize_piece($work);
 # $obj should have methods `name_for_field_normalization', 'shorted_person'

=head1 DESCRIPTION

Databases of names and of works-per-name are taken from plain-text
files (optionally in mail-header format).  Names are stored in F<*.lst> files.
Works are stored in F<.comp> files named after the shortened name
of the composer.

The directories of these files are looked in the environment variable
C<MUSIC_FIELDS_PATH> (if defined, split the same way as C<PATH>), or in
C<$ENV{HOME}/.music_fields>, and C<-> (and C<-> is replaced by the directory
named as the module file with F<.pm> dropped).  At runtime, one can
replace the list by calling function Normalize::Text::Music_Fields::set_path()
with the list of directories as the argument.

(Since parsed files are cached, replacing the directory list should be done
as early as possible.)

Files may be managed with utility subroutines provided with the module:

 # Translate from one-per-line to mail-header format:
 perl -wple "BEGIN {print q(# format = mail-header)} s/#\s*normalized\s*$//; $_ = qq(Title: $_) unless /^\s*(#|$)/; $_ = qq(\n$_) if $p and not /^##/; $_ .= qq(\n) unless $p = /^##/" Normalize::Text::Music_Fields-G_Gershwin.comp >Music_Fields-G_Gershwin.comp-mail

 # (inverse transformation:) Dump pieces listed in mail-header format
 perl -MNormalize::Text::Music_Fields -wle "print for Normalize::Text::Music_Fields::read_composer_file(shift, shift)" gershwin Music_Fields-G_Gershwin.comp-mail > o

 # Normalize data in 1-line-per piece format
 perl -MNormalize::Text::Music_Fields -wle "Normalize::Text::Music_Fields::prepare_tag_object_comp(shift)->Normalize::Text::Music_Fields::normalize_file_lines(shift)"

 # Create a mail-header file from a semi-processed (with "bold" fields)
 # mail-header file (with xml escapes, preceded by opus number)
 perl -MNormalize::Text::Music_Fields -00wnle "BEGIN {$tag = Normalize::Text::Music_Fields::prepare_tag_object_comp(shift @ARGV); print q(# format = mail-header)} print Normalize::Text::Music_Fields::emit_as_mail_header($tag,$_, q(bold,xml,opus),$pre)" shostakovich  o-xslt-better >Music_Fields-D_Shostakovich.comp-mail1

 # Likewise, from work-per-line with opus-numbers:
 perl -MNormalize::Text::Music_Fields -wnle "BEGIN {$tag = Normalize::Text::Music_Fields::prepare_tag_object_comp(shift @ARGV); print qq(# format = mail-header\n)} print Normalize::Text::Music_Fields::emit_as_mail_header($tag,$_, q(opus), $pre)" schnittke  o-schnittke-better  >Music_Fields-A_Schnittke.comp-mail2

 # A primitive tool for merging additional info into the database:
 perl -MNormalize::Text::Music_Fields -wnle "BEGIN {$tag = Normalize::Text::Music_Fields::prepare_tag_object_comp(shift @ARGV); print qq(# format = mail-header\n)} next unless s/^\s*\+\+\s*//; print Normalize::Text::Music_Fields::merge_info($tag,$_, q(opus,xml), qr(^(58|70|76|116|118|119)($|-)))" brahms o-brahms-op-no1-xslt

 # Minimal consistency check of persons database.
 perl -MNormalize::Text::Music_Fields -wle "BEGIN{binmode $_, ':encoding(cp866)' for \*STDIN, \*STDOUT, \*STDERR} print Normalize::Text::Music_Fields->check_persons"

 # Minimal testing code:
 perl -MNormalize::Text::Music_Fields -e Normalize::Text::Music_Fields::test_normalize_piece

It may be easier to type these examples if one uses C<manage_M_N_F.pm>, which
exports the mentioned subroutines to the main namespace (available in
F<examples> directory of a distribution of C<MP3::Tag>).  E.g., the last
example becomes:

 perl -Mmanage_M_N_F -e test_normalize_piece


=cut