A simple hyphenation algorithm (syllabicates Spanish words)
Hyphenation
Sometimes we need to display or print a text, and we'd like to hyphenate long words that don't fit at the end of a line, to prevent them from falling entirely into the next line leaving too much space unused.
The main problem that arises is how to divide a word in syllables. Well, I really don't know how to syllabicate in English, so I leave that part to you, but I hope you find the example on Spanish syllabication useful:
procedure Syllabify(Syllables: TStringList; s: string);
const
Consonants = ['b','B','c','C','d','D','f','F','g','G',
'h','H','j','J','k','K','l','L','m','M','n','N',
'ñ','Ñ','p','P','q','Q','r','R','s','S','t','T',
'v','V','w','W','x','X','y','Y','z','Z'];
StrongVowels = ['a','A','á','Á','e','E','é','É',
'í','Í','o','ó','O','Ó','ú','Ú'];
WeakVowels = ['i','I','u','U','ü','Ü'];
Vowels = StrongVowels + WeakVowels;
Letters = Vowels + Consonants;
var
i, j, n, m, hyphen: integer;
begin
j := 2;
s := #0 + s + #0;
n := Length(s) - 1;
i := 2;
Syllables.Clear;
while i <= n do begin
hyphen := 0; // Do not hyphenate
if s[i] in Consonants then begin
if s[i+1] in Vowels then begin
if s[i-1] in Vowels then hyphen := 1;
end else if (s[i+1] in Consonants) and
(s[i-1] in Vowels) then begin
if s[i+1] in ['r','R'] then begin
if s[i] in ['b','B','c','C','d','D','f','F','g',
'G','k','K','p','P','r','R','t','T','v','V']
then hyphen := 1 else hyphen := 2;
end else if s[i+1] in ['l','L'] then begin
if s[i] in ['b','B','c','C','d','D','f','F','g',
'G','k','K','l','L','p','P','t','T','v','V']
then hyphen := 1 else hyphen := 2;
end else if s[i+1] in ['h', 'H'] then begin
if s[i] in ['c', 'C', 's', 'S', 'p', 'P']
then hyphen := 1 else hyphen := 2;
end else
hyphen := 2;
end;
end else if s[i] in StrongVowels then begin
if (s[i-1] in StrongVowels) then hyphen := 1
end else if s[i] = '-' then begin
Syllables.Add(Copy(s, j, i - j));
Syllables.Add('-');
inc(i);
j := i;
end;
if hyphen = 1 then begin // Hyphenate here
Syllables.Add(Copy(s, j, i - j));
j := i;
end else if hyphen = 2 then begin // Hyphenate after
inc(i);
Syllables.Add(Copy(s, j, i - j));
j := i;
end;
inc(i);
end;
m := Syllables.Count - 1;
if (j = n) and (m >= 0) and (s[n] in Consonants) then
Syllables[m] := Syllables[m] + s[n] // Last letter
else
Syllables.Add(Copy(s, j, n - j + 1)); // Last syllable
end;
To test the procedure yon can drop a Textbox and a Label on a form and in the Change event of the Textbox write:
procedure TForm1.Edit1Change(Sender: TObject);
var
Syllables: TStringList;
begin
Syllables := TStringList.Create;
try
Syllabify(Syllables, Edit1.Text);
Label1.Caption := StringReplace(Trim(Syllables.Text),
#13#10, '-', [rfReplaceAll]);
finally
Syllables.Free;
end;
end;
Now that we have a syllabication procedure, we have to note that we can't hyphenate a word in any syllable break. It is usually correct
and/or desirable to join small syllables at the left and/or right sides of a word to guarantee for example that there are at least two syllables on either side of the word when it gets hyphenated, or -like in the following example- to make sure that at least we have four characters in either side:
procedure ApplyRules(Syllables: TStringList);
// Guarantee there are at least four letters in the left
// and right parts of the word
begin
with Syllables do begin
if Count = 1 then exit;
while Count > 1 do begin
if Length(Strings[0]) >= 4 then break;
Strings[0] := Strings[0] + Strings[1];
Delete(1);
end;
while Syllables.Count > 1 do begin
if Length(Strings[Count-1]) >= 4 then break;
Strings[Count-2] := Strings[Count-2]
+ Strings[Count-1];
Delete(Count-1);
end;
end;
end;
Finally, it comes the time to parse the text separating the lines of a paragraph determining which words should be hyphenated. The following example does that with a text to be displayed in a Memo:
procedure Hyphenate(Memo: TMemo; OriginalText: TStrings);
var
paragraph, i, j, k, m, n, MaxLineWidth: integer;
s, line: string;
Bitmap: TBitmap;
Canvas: TCanvas;
Syllables: TStringList;
begin
Syllables := TStringList.Create;
try
// We need a canvas to use its TextWidth method to get the width
// of the text to see if it fits in the client area or not. The
// TMemo class doesn't have a Canvas property, so we have to
// create one of our own.
Bitmap := TBitmap.Create;
Canvas := Bitmap.Canvas;
try
Canvas.Font := Memo.Font;
MaxLineWidth := Memo.ClientWidth - 6; // Maximum width
Memo.Lines.Clear;
for paragraph := 0 to OriginalText.Count - 1 do begin
// For each paragraph
s := OriginalText[paragraph]; // Get the original paragraph
// Get the lines in which we have to break the paragraph
while Canvas.TextWidth(s) > MaxLineWidth do begin
// First we find (in "j") the index of the start of the
// first word that doesn't fit (the one to hyphenate)
j := 1;
n := Length(s);
i := 2;
while i <= n do begin
if (s[i-1] = ' ') and (s[i] <> ' ') then
j := i; // last beginning of a word
if Canvas.TextWidth(Copy(s, 1, i)) > MaxLineWidth then
break; // reached a width that doesn't fit
inc(i);
end;
// Where does the break occurs?
if s[i] = ' ' then begin
// Great! We break on a space
Memo.Lines.Add(Copy(s, 1, i - 1)); // Add the line
s := Copy(s, i + 1, n - i); // Remove the line
end else begin
// We break somewhere in a word. Now, we find (in "k")
// the first space after the word (k)
k := j + 1;
while (k <= n) and (s[k] <> ' ') do inc(k);
// Divide the word in Syllables
Syllabify(Syllables, Copy(s, j, k - j));
ApplyRules(Syllables);
// Check (in "m") how many syllables fit
m := 0;
Line := Copy(s, 1, j-1);
while Canvas.TextWidth(Line + Syllables[m] + '-')
<= MaxLineWidth do begin
Line := Line + Syllables[m];
inc(m);
end;
if (m <> 0) and (Syllables[m-1] <> '-') then begin
// Hyphenate
Line := Line + '-';
j := Length(Line);
if Syllables[m] = '-' then inc(j);
end;
Memo.Lines.Add(Line); // Add the line
s := Copy(s, j, n - j + 1); // Remove the line
end;
end;
Memo.Lines.Add(s); // Add the last line (it fits)
end;
finally
Bitmap.Free;
end;
finally
Syllables.Free;
end;
end;
To test the procedure, drop a Memo component on a form, align it for example to the top of the form (Align = alTop) and write the following code in the Resize event of the form:
procedure TForm1.FormResize(Sender: TObject);
var
OriginalText: TStringList;
begin
OriginalText := TStringList.Create;
try
OriginalText.Add('Si se ha preguntado cómo hacen los '
+ 'programas procesamiento de textos para dividir palabras '
+ 'con de guiones al final de una línea, he aquí un '
+ 'ejemplo sencillo (en comparación con los que usan las '
+ 'aplicaciones de procesamiento de textos).');
OriginalText.Add('Este es un segundo párrafo que se provee '
+ 'con fines de ejemplo.');
Hyphenate(Memo1, OriginalText);
finally
OriginalText.Free;
end;
end;