socket编程阻塞的问题搞了我几个月,这段时间实在是太漫长了,其实我只是需要一个很简单的函数,那就是获取网页源码,就是一个这么简单的需要,我搞了几个月。

最初我把所有的socket函数放入一个线程,在线程中设置超时时间,但这样会遇到一个问题,如果线程的超时时间设为30秒,30秒没接收完成就表达失败,但在正常情况下网页文件在30秒内也有可能接收不完,所以这个方法宣告失效。

接着又找了个Linux下的多线程下载软件,研究了一下代码,写了个多线程的网页文件下载,这又把问题弄得更复杂了,可能是我的代码没控制好,程序虽然不会阻塞了,但下载的网页源码偶尔会不完整,遇到这个问题又拖了我几个月。

几个月后开始用Delphi了,不是说Delphi的第三方控件多么,用了IdHttp、WinHttp、HTTPCli这三个控件。首先接触的是IdHttp,对IdHttp的第一印象就不好,居然Get百度或Google都会出错,虽然有办法解决,但始终感觉不爽,所以弃之、接着使用WinHttp,这个控件没怎么用,我把他做成DLL,然后在VC中调用,好像也出错,用着用着就无法获取网页源码了,不知是什么问题。最后是HttpCli这个控件,这是ICS组件包中的一个控件,虽然比前两个控件好用些,但最近几天又发现了一个问题,那就是有内存泄漏,以前一直没发现。我测试了一下,不停的用HttpCli获取网页源码,程序的内存占用一直在涨,每Get一次涨几K到几十K不等,我操!最后还是不得不用Socket写,TNND,花了几个月的时间绕了这么一大圈。

最后的解决办法是用线程来解决超时的问题,socket的阻塞函数用得比较熟,非阻塞函数不太会用,所以就想了一招,connect、send、 recv这三个函数我都放在线程里,并设置线程的超时时间,其中recv是接收一次就创建一个线程,并等待线程结束,比如每recv一次,就最多等待10秒,如果超过10秒钟,那就直接退出,这样我看你还怎么阻塞。

unit WinHttp;

interface

uses
    WinSock, Sockets, Windows, SysUtils;

const
    HTTP_OK = 1;
    HTTP_TIMEOUT = 2;
    HTTP_FAIL = 3;
    HTTP_STATECODE_ERR = 4;

function GetHtmlSource(Url:string; TimeOut:Cardinal; var HtmlSource:string):Integer;

implementation

var
    WSAData:TWSAData;
    Host, Path:string;
    sockfd:Integer; //套接字
    hostEnt:PHostEnt;
    addr:sockaddr_in;
    SocketHost:TSocketHost;
    SendData:string;
    Buf:array[0..4096] of char;
    SendBuf:array[0..1024] of Char;
    PrevTime:Cardinal;

    RecvSize:LongInt;
    nRecv:Integer;

    IsCon:Boolean;
    RecvData:string;
    StateCode:Integer;
    HttpHeadDone:Boolean;

procedure InitData;
begin
    Host := '';
    Path := '';
    sockfd := 0;
    hostEnt := nil;
    SocketHost := '';
end;
//分隔URL
procedure ParseURL(
    const Url : String;
    var Host, Path : String);
var
    nIndex:Integer;
    S:string;
begin
    S := LowerCase(url);
    if Pos('http://', S) <> 0 then
    begin
        //删除http://
        Delete(S, 1, Length('http://'));
    end;

    nIndex := Pos('/', S);
    if nIndex = 0 then
    begin
        Host := S;
        Path := '/';
    end
    else
    begin
        Host := Copy(S, 1, nIndex - 1);
        Path := Copy(S, nIndex, Length(S));
    end;
end;

function ConnectThread(P:Pointer):LongInt;stdcall;
begin
    if connect(sockfd, addr, SizeOf(addr)) <> 0 then
    begin
        IsCon := False;
    end
    else
    begin
        IsCon := True;
    end;
    EndThread(0);
    Result := 1;
end;

function SendThread(P:Pointer):LongInt;stdcall;
begin
    send(sockfd, SendBuf, StrLen(SendBuf), 0);

    EndThread(0);
    Result := 1;
end;

function RecvThread(P:Pointer):LongInt;stdcall;
begin
    nRecv := recv(sockfd, Buf, 4096, 0);
    
    EndThread(0);
    Result := 1;
end;

function GetHtmlSource(Url:string; TimeOut:Cardinal; var HtmlSource:string):Integer;
var
    ThreadId:Cardinal;
    hHandle:THandle;
    S:string;
    nIndex:Integer;
begin
    //清空单元变量,单元变量不会自动释放
    RecvData := '';
    SendData := '';
    FillChar( Buf, 4096, 0);
    FillChar( SendBuf, 1024, 0);
    SocketHost := '';
    hostEnt := nil;
    sockfd := 0;
    Host := '';
    Path := '';

    if WSAStartup(MakeWord(2,2), WSAData) <> 0 then
    begin
        Result := 1;
        Exit;
    end;

    ParseURL(Url, Host, Path);

    //建立套接字
    sockfd := socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);

    if Host <> '' then
    begin
        if Host[1] in ['0'..'9'] then
        begin
          if inet_addr(PChar(Host)) <> INADDR_NONE then
            SocketHost := Host;
        end
        else
        begin
          hostEnt := gethostbyname(pchar(Host));
          if hostEnt <> nil then
            with hostEnt^ do
            SocketHost := format('%d.%d.%d.%d', [ord(h_addr^[0]), ord(h_addr^[1]),
                  ord(h_addr^[2]), ord(h_addr^[3])]);
        end;
    end
    else SocketHost := '0.0.0.0';

    addr.sin_family := AF_INET;
    addr.sin_port := htons(80);
    addr.sin_addr.S_addr := inet_addr(PChar(SocketHost));

    //connect线程
    hHandle := BeginThread(nil, 0, @ConnectThread, nil, 0, ThreadId);
    if WaitForSingleObject(hHandle, TimeOut * 1000) <> WAIT_OBJECT_0 then
    begin
        Result := HTTP_TIMEOUT;
        closesocket(sockfd);
        WSACleanup;
        Exit;
    end;

    SendData := 'GET ' + Path + ' HTTP/1.1' + #13#10;
    SendData := SendData + 'Host: ' + Host + #13#10;
    SendData := SendData + 'Connection: Close' + #13#10#13#10;

    FillChar(SendBuf, 1024, 0);
    StrCopy(SendBuf, PChar(SendData));

    //send线程
    hHandle := BeginThread(nil, 0, @SendThread, nil, 0, ThreadId);
    if WaitForSingleObject(hHandle, TimeOut * 1000) <> WAIT_OBJECT_0 then
    begin
        Result := HTTP_TIMEOUT;
        closesocket(sockfd);
        WSACleanup;
        Exit;
    end;

    FillChar(Buf, Length(Buf), 0);
    PrevTime := GetTickCount;
    RecvSize := 0;

    HttpHeadDone := False;
    while True do
    begin
        //recv线程
        hHandle := BeginThread(nil, 0, @RecvThread, nil, 0, ThreadId);
        if WaitForSingleObject(hHandle, TimeOut * 1000) <> WAIT_OBJECT_0 then
        begin
            Result := HTTP_TIMEOUT;
            RecvData := '';
            closesocket(sockfd);
            WSACleanup;
            Exit;
        end;

        if nRecv = -1 then
        begin
            HtmlSource := '';
            Result := HTTP_FAIL;
            closesocket(sockfd);
            WSACleanup;
            Exit;
        end
        else if nRecv = 0 then
        begin
            Break;
        end
        else
        begin
            RecvData := RecvData + Buf;
            FillChar(Buf, Length(Buf), 0);

            if not HttpHeadDone then
            begin
                if Pos(#13#10#13#10, RecvData) <> 0 then
                begin
                    S := Copy(RecvData, 1, Pos(#13#10, RecvData) - 1);
                    Delete(S, 1, Length('HTTP/1.1 '));
                    nIndex := Pos(' ', S);
                    Delete(S, nIndex, Length(S) - (nIndex - 1));
                    StateCode := StrToInt(S);
                    if StateCode <> 200 then
                    begin
                        Result := HTTP_STATECODE_ERR;
                        HtmlSource := '';
                        closesocket(sockfd);
                        WSACleanup;
                        Exit;
                    end;

                    //删除HTTP头
                    Delete(RecvData, 1, Pos(#13#10#13#10, RecvData) + Length(#13#10#13#10) - 1);
                    HttpHeadDone := True;
                end;
            end;
        end;
    end;

    HtmlSource := RecvData;

    closesocket(sockfd);
    WSACleanup;
    Result := HTTP_OK;
end;

initialization

finalization

end.