Examples Delphi

I often get questions like: "How can I cut out the text I wan't from a HTML-file ?"
One way to do this is to first remove all taginformation, and make a clean textfile of it. How can I do that? Well it's quite simple. A HTML-file consists of tags and text. Everything that's surrounded by a '<' and a '>' - character is what we call tags. Tags is information which the webbrowser will respond to in different ways. On the other hand, things between a '>' and '<' is pure text (with some exceptions), which the browser will write to the screen.
Lets consider two exceptions in our simple converter: ' ' - nonbreaking space and '"' - quotation mark.
Let's convert the break-tag '
' to a carriage return (#13#10).
Finally we delete empty lines and save the file with a txt-extension.
Here's the code:

procedure TForm1.ConvertHTMLToText(FileName: TFileName);
var
Dummy: TStringList;
Str, ResText: string;
Count, n: Integer;
MayBeText: Boolean;
begin
Dummy:= TStringList.Create;
try
Dummy.LoadFromFile(FileName);
Str:= Dummy.Text;
Count:= 0;
MayBeText:= false;
ResText:= '';
for n:= 1 to Length(Str) do
begin
if Count > 0 then
Dec(Count)
else
begin
if (Str[n] = '&') and // &NBSP;
(Uppercase(Str[n + 1]) = 'N') and
(Uppercase(Str[n + 2]) = 'B') and
(Uppercase(Str[n + 3]) = 'S') and
(Uppercase(Str[n + 4]) = 'P') and
(Uppercase(Str[n + 5]) = ';') then
begin
// Skip next five chars
Count:= 5;
ResText:= ResText + ' ';
end
else
if (Str[n] = '&') and // "
(Uppercase(Str[n + 1]) = 'Q') and
(Uppercase(Str[n + 2]) = 'U') and
(Uppercase(Str[n + 3]) = 'O') and
(Uppercase(Str[n + 4]) = 'T') and
(Uppercase(Str[n + 5]) = ';') then
begin
// Skip next five chars
Count:= 5;
ResText:= ResText + '"';
end
else
if MayBeText and (Str[n] <> '<') then // Consider as text
ResText:= ResText + Str[n];
if Str[n] = '<' then
begin
MayBeText:= false;
//

if (Uppercase(Str[n + 1]) = 'B') and (Uppercase(Str[n + 2]) = 'R') and
(Uppercase(Str[n + 3]) = '>') then
ResText:= ResText + #13#10;
end;
if Str[n] = '>' then
MayBeText:= true;
end;
end;
Str:= '';
Str:= ResText;
ResText:= '';
Count:= 0;
// suppress empty lines
for n:= 1 to Length(Str) do
begin
if Count > 0 then
Dec(Count)
else
begin
if (Str[n] = #13) and (Str[n + 1] = #10) and (Str[n + 2] = #13) and
(Str[n + 3] = #10) then
Count:= 1
else
ResText:= ResText + Str[n];
end;
end;
Dummy.Text:= ResText;
Dummy.SaveToFile(Copy(FileName, 1, Pos('.', FileName)) + 'txt');
finally
Dummy.Free;
end;
end;