Originally Posted by warfley
hier ist mal ein kleines Beispiel:
Code:
procedure TForm1.Button1Click(Sender: TObject);
procedure CheckCSS(CSS: string; lst: TStringList);
begin
//Parse CSS here
end;
procedure CheckForLocalFiles(tag: PHtmlTag; lst: TStringList);
var
i: integer;
begin
if Assigned(lst) then
begin
if tag^.TagType = 'style' then
CheckCSS(tag^.Content, lst)
else if tag^.TagType = 'link' then
begin
for i := 0 to tag^.Propertys.Count - 1 do
begin
if PTagProperty(tag^.Propertys[i])^.Name = 'href' then
if AnsiEndsText('.css', PTagProperty(tag^.Propertys[i])^.Value) then
begin
lst.Add(PTagProperty(tag^.Propertys[i])^.Value);
end;
end;
end
else
begin
for i := 0 to tag^.Propertys.Count - 1 do
begin
if PTagProperty(tag^.Propertys[i])^.Name = 'style' then
CheckCSS(PTagProperty(tag^.Propertys[i])^.Value, lst)
else if PTagProperty(tag^.Propertys[i])^.Name = 'src' then
lst.Add(PTagProperty(tag^.Propertys[i])^.Value);
end;
end;
for i := 0 to tag^.SubTags.Count - 1 do
CheckForLocalFiles(PHtmlTag(tag^.SubTags[i]), lst);
end;
end;
var
DL_Links: TStringList;
Htm: THtmlFile;
htmText: string;
i: integer;
begin
DL_Links := TStringList.Create;
try
htmText := IdHTTP1.Get('Link');
Htm := THtmlFile.Create;
try
Htm.LoadHtmlFromString(htmText);
CheckForLocalFiles(Htm.Tags, DL_Links);
// Hier Dateien runterladen und ggf seperat parsen (css dateien)
for i:=0 to DL_Links.Count-1 do
begin
htmText:=StringReplace(htmText, DL_Links[i], 'Neuer Local File', [rfIgnoreCase, rfReplaceAll]);
end;
finally
Htm.Free;
end;
finally
DL_Links.Free;
end;
end;
mit einem kleinen Html Parser den ich mal geschrieben hatte:
Code:
unit HtmlParse;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils;
type
PHtmlTag = ^THtmlTag;
THtmlTag = record
Parent: PHtmlTag;
IID: integer;
TagType: string;
Content: string;
Propertys: TList;
SubTags: TList;
end;
PTagProperty = ^TTagProperty;
TTagProperty = record
Name: string;
Value: string;
end;
THtmlFile = class(TObject)
private
FTags: PHtmlTag;
function GetTag(str: string; var pos: integer): PHtmlTag;
public
property Tags: PHtmlTag read FTags;
destructor Destroy; override;
constructor Create;
procedure LoadHtmlFile(FileStream: TStream); overload;
procedure LoadHtmlFile(FileName: string); overload;
procedure LoadHtmlFromString(FileCont: string);
function GetAllTags(TagName: string; SelTags: TList): integer;
function GetTagById(TagID: string): PHtmlTag;
function GetTagByIId(TagID: string): PHtmlTag;
function GetTypeRange(RangeList: TStrings): integer;
function GetTagProperty(Tag: PHtmlTag; PropertyName: string): PTagProperty;
end;
TValType = (vtRadio, vtPass, vtCheck, vtEdit, vtHidden, vtSelect);
PInputField = ^TInputField;
TInputField = record
Name: string;
Values: TStringList;
SelVal: string;
ValType: TValType;
end;
THtmlForm = class(TObject)
private
Url: string;
FIsPost: boolean;
FInputs: TList;
FormPointer: PHtmlTag;
Blacklst: TStringList;
procedure GetAllRadios(RName: string; lst: TStrings);
procedure GetAllOptions(SelTag: PHtmlTag; lst: TStrings);
procedure DisposeCurrForm;
procedure LoadForm(Form: PHtmlTag);
{ private declarations }
public
property IsPost: boolean read FIsPost;
property Inputs: TList read FInputs;
constructor Create(FormTag: PHtmlTag);
destructor Destroy; override;
function GenerateRequest(var aUrl: string): string;
end;
function StringsContain(lst: TStrings; SearchPhrase: string;
caseSensetive: boolean = True): boolean;
function GetTagPropertyVal(P: string; PTag: PHtmlTag): string;
implementation
function StringsContain(lst: TStrings; SearchPhrase: string;
caseSensetive: boolean = True): boolean;
var
i: integer;
begin
Result := False;
for i := 0 to lst.Count - 1 do
if caseSensetive then
begin
if lst[i] = SearchPhrase then
begin
Result := True;
break;
end;
end
else
if LowerCase(lst[i]) = LowerCase(SearchPhrase) then
begin
Result := True;
break;
end;
end;
function GetTagPropertyVal(P: string; PTag: PHtmlTag): string;
var
i: integer;
begin
Result := 'Not Available';
for i := 0 to PTag^.Propertys.Count - 1 do
if LowerCase(PTagProperty(PTag^.Propertys[i])^.Name) = LowerCase(P) then
begin
Result := PTagProperty(PTag^.Propertys[i])^.Value;
end;
end;
constructor THtmlForm.Create(FormTag: PHtmlTag);
begin
inherited Create;
LoadForm(FormTag);
end;
destructor THtmlForm.Destroy;
begin
DisposeCurrForm;
inherited Destroy;
end;
procedure THtmlForm.GetAllRadios(RName: string; lst: TStrings);
procedure SearchForRadio(RDName: string; lst: TStrings; start: PHtmlTag);
var
i: integer;
tmps: string;
begin
if LowerCase(start^.TagType) = 'input' then
if LowerCase(GetTagPropertyVal('type', start)) = 'radio' then
if GetTagPropertyVal('name', start) = RDName then
begin
tmps := GetTagPropertyVal('value', start);
if tmps = 'Not Available' then
tmps := start^.Content;
lst.Add(tmps);
end;
for i := 0 to start^.SubTags.Count - 1 do
SearchForRadio(RDName, lst, start^.SubTags[i]);
end;
begin
SearchForRadio(RName, lst, FormPointer);
end;
procedure THtmlForm.GetAllOptions(SelTag: PHtmlTag; lst: TStrings);
var
i: integer;
tmps: string;
begin
if LowerCase(SelTag^.TagType) = 'option' then
begin
tmps := GetTagPropertyVal('value', SelTag);
if tmps = 'Not Available' then
tmps := SelTag^.Content;
lst.Add(tmps);
end;
for i := 0 to SelTag^.SubTags.Count - 1 do
GetAllOptions(SelTag^.SubTags[i], lst);
end;
procedure THtmlForm.DisposeCurrForm;
var
i: integer;
begin
FormPointer := nil;
Url := '';
Blacklst.Free;
if Assigned(Inputs) then
begin
for i := 0 to Inputs.Count - 1 do
begin
PInputField(Inputs[i])^.Values.Free;
Dispose(PInputField(Inputs[i]));
end;
Inputs.Free;
end;
end;
procedure THtmlForm.LoadForm(Form: PHtmlTag);
procedure LoadInputs(Tag: PHtmlTag);
var
i: integer;
tmpInp: PInputField;
tmps: string;
begin
tmpInp := nil;
if LowerCase(Tag^.TagType) = 'input' then
begin
tmps := GetTagPropertyVal('type', Tag);
tmps := LowerCase(tmps);
if tmps = 'checkbox' then
begin
new(tmpInp);
tmpInp^.Name := GetTagPropertyVal('name', Tag);
tmpInp^.Values := TStringList.Create;
tmps := GetTagPropertyVal('value', Tag);
if tmps = 'Not Available' then
tmps := Tag^.Content;
tmpInp^.Values.Add(tmps);
tmpInp^.SelVal := tmps;
tmpInp^.ValType := vtCheck;
end
else if tmps = 'hidden' then
begin
new(tmpInp);
tmpInp^.Name := GetTagPropertyVal('name', Tag);
tmpInp^.Values := TStringList.Create;
tmps := GetTagPropertyVal('value', Tag);
if tmps = 'Not Available' then
tmps := Tag^.Content;
tmpInp^.Values.Add(tmps);
tmpInp^.SelVal := tmps;
tmpInp^.ValType := vtHidden;
end
else if tmps = 'password' then
begin
new(tmpInp);
tmpInp^.Name := GetTagPropertyVal('name', Tag);
tmpInp^.Values := TStringList.Create;
tmps := GetTagPropertyVal('value', Tag);
if tmps = 'Not Available' then
tmps := Tag^.Content;
tmpInp^.Values.Add(tmps);
tmpInp^.SelVal := tmps;
tmpInp^.ValType := vtPass;
end
else if tmps = 'text' then
begin
new(tmpInp);
tmpInp^.Name := GetTagPropertyVal('name', Tag);
tmpInp^.Values := TStringList.Create;
tmps := GetTagPropertyVal('value', Tag);
if tmps = 'Not Available' then
tmps := Tag^.Content;
tmpInp^.Values.Add(tmps);
tmpInp^.SelVal := tmps;
tmpInp^.ValType := vtEdit;
end
else if tmps = 'radio' then
begin
if not StringsContain(Blacklst, GetTagPropertyVal('name', Tag)) then
begin
new(tmpInp);
tmpInp^.Name := GetTagPropertyVal('name', Tag);
tmpInp^.Values := TStringList.Create;
GetAllRadios(tmpInp^.Name, tmpInp^.Values);
Blacklst.Add(tmpInp^.Name);
tmpInp^.SelVal := '';
tmpInp^.ValType := vtRadio;
end;
end;
end
else if LowerCase(Tag^.TagType) = 'select' then
begin
new(tmpInp);
tmpInp^.Name := GetTagPropertyVal('name', Tag);
tmpInp^.Values := TStringList.Create;
GetAllOptions(Tag, tmpInp^.Values);
tmpInp^.SelVal := tmpInp^.Values[0];
tmpInp^.ValType := vtSelect;
end
else if LowerCase(Tag^.TagType) = 'textarea' then
begin
new(tmpInp);
tmpInp^.Name := GetTagPropertyVal('name', Tag);
tmpInp^.Values := TStringList.Create;
tmps := GetTagPropertyVal('value', Tag);
if tmps = 'Not Available' then
tmps := Tag^.Content;
tmpInp^.Values.Add(tmps);
tmpInp^.SelVal := tmps;
tmpInp^.ValType := vtEdit;
end;
if Assigned(tmpInp) then
begin
if tmpInp^.Name = 'Not Available' then
begin
tmpInp^.Values.Free;
Dispose(tmpInp);
end
else
Inputs.Add(tmpInp);
end;
for i := 0 to Tag^.SubTags.Count - 1 do
LoadInputs(Tag^.SubTags[i]);
end;
var
x: integer;
begin
DisposeCurrForm;
FInputs := TList.Create;
Blacklst := TStringList.Create;
FormPointer := Form;
LoadInputs(Form);
Blacklst.Free;
Url := GetTagPropertyVal('action', FormPointer);
FIsPost := LowerCase(GetTagPropertyVal('method', FormPointer)) = 'post';
end;
function THtmlForm.GenerateRequest(var aUrl: string): string;
var
aPost: string;
i: integer;
tmpI: PInputField;
begin
Result := '';
aUrl := Url;
aPost := '';
for i := 0 to Inputs.Count - 1 do
begin
tmpI := Inputs[i];
if tmpI^.SelVal <> '' then
begin
aPost := aPost + '&' + tmpI^.Name + '=' + tmpI^.SelVal;
end;
end;
aPost := Copy(aPost, 2, Length(aPost) - 1);
if IsPost then
Result := aPost
else
aUrl := aUrl + '?' + aPost;
end;
procedure THtmlFile.LoadHtmlFromString(FileCont: string);
var
i: integer;
begin
i := 0;
repeat
Inc(i);
until FileCont[i] = '<';
FTags := GetTag(FileCont, i);
end;
function THtmlFile.GetTag(str: string; var pos: integer): PHtmlTag;
var
instring: boolean;
tmpProp: PTagProperty;
donewithType: boolean;
lastaspace: boolean;
setpropval: boolean;
closed: boolean;
tmps: char;
Cont: boolean;
tmpnewp: PHtmlTag;
comment: boolean;
begin
donewithType := False;
comment := False;
lastaspace := False;
setpropval := False;
instring := False;
closed := False;
Cont := False;
new(Result);
Result^.Propertys := TList.Create;
Result^.SubTags := TList.Create;
Result^.IID := pos;
repeat
Inc(pos);
tmps := str[pos];
if not comment then
begin
if not closed then
begin
if (tmps = ' ') and (not instring) and (not Cont) then
begin
if not lastaspace then
begin
lastaspace := True;
if not donewithType then
begin
donewithType := True;
end;
if setpropval then
begin
setpropval := False;
Result^.Propertys.Add(tmpProp);
end;
end;
end
else if (tmps = '"') and (not Cont) then
begin
instring := not instring;
lastaspace := False;
end
else if (tmps = '=') and (not instring) and (not Cont) then
begin
setpropval := True;
lastaspace := False;
end
else if (tmps = '/') and (not instring) and (not Cont) then
begin
closed := True;
end
else if (tmps = '>') and (not instring) and (not Cont) then
begin
if (lowercase(Result^.TagType) = 'br') or
(lowercase(Result^.TagType) = 'meta') or
(lowercase(Result^.TagType) = 'input') or
(lowercase(Result^.TagType) = 'link') then
begin
break;
end;
if setpropval then
begin
Result^.Propertys.Add(tmpProp);
end;
cont := True;
lastaspace := False;
end
else if (tmps = '<') and (cont) then
begin
if str[pos + 1] = '/' then
closed := True
else
begin
if (str[pos + 1] = '!') and (str[pos + 2] = '-') and
(str[pos + 3] = '-') then
comment := True
else
begin
tmpnewp := GetTag(str, pos);
tmpnewp^.Parent := Result;
Result^.SubTags.Add(tmpnewp);
end;
end;
end
else
begin
if Cont then
Result^.Content := Result^.Content + tmps
else if (not donewithType) then
Result^.TagType := Result^.TagType + tmps
else if setpropval then
tmpProp^.Value := tmpProp^.Value + tmps
else if lastaspace then
begin
new(tmpProp);
tmpProp^.Name := tmpProp^.Name + tmps;
end
else
tmpProp^.Name := tmpProp^.Name + tmps;
end;
if tmps <> ' ' then
lastaspace := False;
end
else
if tmps = '>' then
begin
break;
end;
end
else
begin
if (tmps = '-') and (str[pos + 1] = '-') and (str[pos + 2] = '>') then
comment := False;
end;
until pos >= Length(str);
end;
destructor THtmlFile.Destroy;
procedure DeallocPointer(P: PHtmlTag);
var
i: integer;
begin
if Assigned(P) then
begin
for i := 0 to P^.SubTags.Count - 1 do
DeallocPointer(P^.SubTags[i]);
for i := 0 to P^.Propertys.Count - 1 do
Dispose(PTagProperty(P^.Propertys[i]));
P^.SubTags.Free;
P^.Propertys.Free;
Dispose(P);
end;
end;
begin
if Assigned(FTags) then
DeallocPointer(FTags);
inherited Destroy;
end;
constructor THtmlFile.Create;
begin
inherited Create;
end;
procedure THtmlFile.LoadHtmlFile(FileStream: TStream);
overload;
var
StrLen, i: integer;
TempStr: string;
begin
TempStr := '';
FileStream.ReadBuffer(StrLen, SizeOf(integer));
if StrLen > -1 then
begin
SetLength(TempStr, StrLen);
FileStream.ReadBuffer(Pointer(TempStr)^, StrLen * SizeOf(char));
i := 0;
repeat
Inc(i);
until TempStr[i] = '<';
FTags := GetTag(TempStr, i);
end;
end;
procedure THtmlFile.LoadHtmlFile(FileName: string); overload;
var
tmplst: TStringList;
i: integer;
begin
try
tmplst := TStringList.Create;
tmplst.LoadFromFile(FileName);
i := 0;
repeat
Inc(i);
until tmplst.Text[i] = '<';
FTags := GetTag(tmplst.Text, i);
finally
tmplst.Free;
end;
end;
function THtmlFile.GetAllTags(TagName: string; SelTags: TList): integer;
procedure GetTagsByName(TName: string; ParentTag: PHtmlTag; STags: TList;
var Number: integer);
var
i: integer;
begin
if LowerCase(ParentTag^.TagType) = LowerCase(TName) then
begin
STags.Add(ParentTag);
Inc(Number);
end;
for i := 0 to ParentTag^.SubTags.Count - 1 do
GetTagsByName(TName, ParentTag^.SubTags[i], STags, Number);
end;
begin
Result := 0;
if Assigned(SelTags) then
GetTagsByName(TagName, FTags, SelTags, Result);
end;
function THtmlFile.GetTagById(TagID: string): PHtmlTag;
function SearchTag(TID: string; ParentTag: PHtmlTag): PHtmlTag;
var
i: integer;
begin
Result := nil;
if Assigned(ParentTag) then
begin
for i := 0 to ParentTag^.Propertys.Count - 1 do
if (LowerCase(PTagProperty(ParentTag^.Propertys[i])^.Name) = 'id') and
(PTagProperty(ParentTag^.Propertys[i])^.Value = TID) then
begin
Result := ParentTag;
break;
end;
for i := 0 to ParentTag^.SubTags.Count do
if not Assigned(Result) then
Result := SearchTag(TID, ParentTag^.SubTags[i]);
end;
end;
begin
Result := SearchTag(TagID, FTags);
end;
function THtmlFile.GetTagByIId(TagID: string): PHtmlTag;
function SearchTag(TID: string; ParentTag: PHtmlTag): PHtmlTag;
var
i: integer;
begin
Result := nil;
if Assigned(ParentTag) then
begin
if ParentTag^.IID = StrToInt(TagID) then
Result := ParentTag
else
for i := 0 to ParentTag^.SubTags.Count do
if not Assigned(Result) then
Result := SearchTag(TID, ParentTag^.SubTags[i]);
end;
end;
begin
Result := SearchTag(TagID, FTags);
end;
function THtmlFile.GetTagProperty(Tag: PHtmlTag; PropertyName: string): PTagProperty;
var
i: integer;
begin
Result := nil;
if Assigned(Tag) then
for i := 0 to Tag^.Propertys.Count - 1 do
if TTagProperty(Tag^.Propertys[i]^).Name = PropertyName then
begin
Result := PTagProperty(Tag^.Propertys[i]);
break;
end;
end;
function THtmlFile.GetTypeRange(RangeList: TStrings): integer;
procedure GetRange(RLst: TStrings; StartP: PHtmlTag);
var
i: integer;
begin
if not StringsContain(RLst, UpperCase(StartP^.TagType)) then
RLst.Add(UpperCase(StartP^.TagType));
for i := 0 to StartP^.SubTags.Count - 1 do
GetRange(RLst, StartP^.SubTags[i]);
end;
begin
GetRange(RangeList, FTags);
Result := RangeList.Count;
end;
end.
Ist aber Free Pascal, kann sein dass du für Delphi ein Paar Änderungen vornehmen musst
|