用MASM32编程读取网站首页代码
记得在某个汇编论坛上有网友讨论如何获取一个网站的首页文件名,是index.htm,index.html,index.asp,还是……
于是动手写了这个程序读取网站的返回信息来做测试,发现有些网站的返回信息中的Content-Location值包含了首页文件名,如www.hcny.gov.cn:
/---
HTTP/1.1 200 OK
Content-Length: 34432
Content-Type: text/html
Content-Location: http://www.hcny.gov.cn/index.htmLast-Modified: Wed, 05 Dec 2007 02:59:18 GMT
Accept-Ranges: bytes
ETag: "244d2d3ea36c81:80a"
Server: Microsoft-IIS/6.0
Date: Wed, 05 Dec 2007 15:05:19 GMT
---/
有些网站则没有,如www.163.com:
/---
HTTP/1.0 200 OK
Date: Wed, 05 Dec 2007 15:01:13 GMT
Server: Apache/2.0.59 (Unix)
Accept-Ranges: bytes
Vary: Accept-Encoding
Content-Length: 127476
Content-Type: text/html; charset=GB2312
Age: 265
X-Cache: HIT from www.163.comConnection: keep-alive
---/
另外 IE 从 SP2 开始不支持 view-source 了,为了使用 view-source 而装一个 FireFox 似乎有点麻烦,于是加了读取网页代码的功能。目前是读取网站的首页的代码。
其中的一些代码参考了MASM32官方论坛的贴子:
http://www.masm32.com/board/index.php?topic=8197.0
增加了SearchStr()来搜索网页代码结束位置后,程序运行时间要多久一些,改进的方法是参考网站的返回信息中的Content-Length的值,在最后一次从Sock读取数据时进行进行搜索……
;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
; FileName: WebHome.asm
; Author: Purple Endurer
; Functiion: Read a web homepage content
; DevEnv: Win XP SP2 + MASM32 v8
; log
; ------------------------------
; 2007-12-05 Added SearchStr() to search the webpage code end mark string
; 2007-12-04 Can read web homepage content
; 2007-12-03 Created!
;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
.386
.model
flat,
stdcall
option
casemap:
none
include /masm32/
include/windows.inc
include /masm32/
include/kernel32.inc
includelib /masm32/
lib/kernel32.lib
include /masm32/
include/user32.inc
includelib /masm32/
lib/user32.lib
include /masm32/
include/shell32.inc
includelib /masm32/
lib/shell32.lib
include /masm32/
include/wsock32.inc
includelib /masm32/
lib/wsock32.lib
WinMain
PROTO :
DWORD, :
DWORD, :
DWORD, :
DWORD
btnShow_Click
PROTO
fInitInternet
proto :
dword
fConnect
proto lpszHostName:
dword,nPortNumber:
dword
SendHttpHead
proto :LPSTR, :LPSTR
ReadSockData
proto :LPSTR, :
dword
ResizeConctrol
proto :
DWORD
SearchStr
proto :LPSTR, :LPSTR
m_GotoEnd
MACRO
invoke SendMessage, g_hEditVerInfo, EM_SETSEL, 0 , -1
invoke SendMessage, g_hEditVerInfo, EM_SETSEL, -1 , 0
ENDM
m_InsStr
MACRO lpszStr: REQ
invoke SendMessage, g_hEditVerInfo, EM_REPLACESEL,
FALSE, lpszStr
ENDM
m_GoNextLine
MACRO
m_InsStr
addr g_szCR
ENDM
m_InsCrStr
MACRO lpszStr: REQ
m_GoNextLine
m_InsStr lpszStr
ENDM
WM_FSOCKET
equ WM_USER + 0fh
IDC_BtnRead
equ 103
IDC_EdtURL
equ 105
IDC_EdtHTML
equ 107
c_BufLen
equ 5000h
c_Resize
equ 1
d_TestData
equ 1
c_EditURLLeft
equ 2
c_EditURLTop
equ 5
c_EditURLWidth
equ 500
c_EditURLHeight
equ 20
c_BtnReadTop
equ c_EditURLTop
c_BtnReadLeft
equ (c_EditURLLeft+c_EditURLWidth+10)
c_BtnReadWidth
equ 70
c_BtnReadHeight
equ c_EditURLHeight
c_EdtHTMLLeft
equ 2
c_EdtHTMLTop
equ 30
c_EdtHTMLWidth
equ c_BtnReadLeft+c_BtnReadWidth+2
c_EdtHTMLHeight
equ 300
c_WinWidth
equ c_EdtHTMLLeft+c_EdtHTMLWidth+10
c_WinHeight
equ c_EdtHTMLTop+c_EdtHTMLHeight+30
c_RichEditStyle
equ WS_CHILD
or WS_VISIBLE
or ES_MULTILINE
or WS_VSCROLL
or ES_AUTOVSCROLL
or WS_HSCROLL
or ES_NOHIDESEL
or ES_SAVESEL
or ES_SELECTIONBAR
MAX_STRING_LEN
equ 8192
; 2000h
SYSINFO_RET_OK
equ 1
SYSINFO_RET_FAIL
equ 2
;sssssssssss
.data
;sssssssssss
g_szClsName label
byte
g_szAppName
db
"HttpDemo", 0
if d_TestData eq 1
g_szTestURL
db
"www.hcny.gov.cn", 0
endif
;d_TestData
g_szFailIni
db
"Fail initialize internet connection!", 0
g_szFailGetHostName
db
"Fail to get host name!", 0
g_szConnect
db
"Fail to connect!", 0
g_szFailWSAStartup
db
"Fail to WSAStartup", 0
g_szEnterURL
db
"请先输入URL!", 0
g_szEditCls
db
"EDIT", 0
g_szBtnCls
db
"button", 0
g_szNoRichEdit
db
"无法载入"
g_szRichEditDLL
db
"RICHED20.DLL", 0
g_szRichEditClass
db
"RichEdit20A", 0
g_szBtnReadText
db
"&R 读取", 0
g_szFmt1
db
"GET /%s HTTP/1.1", 0dh, 0ah
db
"Host:%s", 0dh, 0ah
db
"Accept: */*", 0dh, 0ah
db
"User-Agent: Mozilla/4.0"
db
"(compatible; MSIE 6.00; Windows 2000)", 0dh, 0ah
db
"Connection:Keep-Alive", 0dh, 0ah
db 0dh, 0ah
g_szCR
db 0dh, 0ah, 0
g_szHTTP400
db
"HTTP/1.1 400 Bad Request", 0
g_szFmt2
db
"WSACleanup failed with error %d", 0
;sssssssssss
.data?
;sssssssssss
g_hInstance HINSTANCE ?
g_hWndMain HANDLE ?
g_hEditURL HANDLE ?
g_hBtnRead HANDLE ?
g_hEditVerInfo HANDLE ?
g_hRichEditDLL HANDLE ?
wsadata WSADATA <>
sin sockaddr_in <>
sock
dd ?
error_code
dd ?
g_szURL
db 256 dup(?)
g_buf
byte c_BufLen dup (?)
;sssssssssss
.code
;sssssssssss
start:
invoke GetModuleHandle, NULL
mov g_hInstance,
eax
invoke LoadLibrary,
OFFSET g_szRichEditDLL
.if
eax != 0
mov g_hRichEditDLL,
eax
invoke WinMain, g_hInstance, NULL, NULL, SW_SHOWDEFAULT
invoke FreeLibrary, g_hRichEditDLL
.else
invoke MessageBox, 0,
OFFSET g_szNoRichEdit,/
OFFSET g_szAppName, MB_OK
or MB_ICONERROR
.endif
invoke ExitProcess,
eax
WinMain
proc hInst:
DWORD, hPrevInst:
DWORD, CmdLine:
DWORD, CmdShow:
DWORD
LOCAL wc: WNDCLASSEX
LOCAL msg: MSG
LOCAL hwnd: HWND
mov wc.cbSize, SIZEOF WNDCLASSEX
mov wc.style, CS_HREDRAW
or CS_VREDRAW
mov wc.lpfnWndProc,
OFFSET WndProc
mov wc.cbClsExtra, NULL
mov wc.cbWndExtra, NULL
mov
eax, g_hInstance
mov wc.hInstance,
eax
mov wc.hbrBackground, COLOR_APPWORKSPACE
mov wc.lpszMenuName, NULL
mov wc.lpszClassName,
OFFSET g_szClsName
invoke LoadIcon, NULL, IDI_APPLICATION
mov wc.hIcon,
eax
mov wc.hIconSm,
eax
invoke LoadCursor, NULL, IDC_ARROW
mov wc.hCursor,
eax
invoke RegisterClassEx,
addr wc
invoke CreateWindowEx, NULL,
ADDR g_szClsName,
ADDR g_szAppName,/
WS_OVERLAPPEDWINDOW + WS_VISIBLE, CW_USEDEFAULT, CW_USEDEFAULT,/
c_WinWidth, c_WinHeight, NULL, NULL, hInst, NULL
mov hwnd,
eax
.while
TRUE
invoke GetMessage,
ADDR msg, NULL, 0, 0
.BREAK
.IF (!
eax)
;--- process keystrokes directly in the message loop
.if msg.message == WM_SYSKEYUP
.if msg.wParam == VK_R
; Alt + R
invoke PostMessage, hwnd, WM_COMMAND, IDC_BtnRead, BM_CLICK
.endif
.endif
; ------------------------------------------------
invoke TranslateMessage,
ADDR msg
invoke DispatchMessage,
ADDR msg
.endw
mov
eax, msg.wParam
ret
WinMain
endp
WndProc
proc hWnd: HWND, uMsg: UINT, wParam: WPARAM, lParam: LPARAM
LOCAL rect: RECT
LOCAL hdc:
DWORD
.if uMsg==WM_CREATE
mov
eax, hWnd
mov g_hWndMain,
eax
;--- Create URL editbox
invoke CreateWindowEx, NULL,
offset g_szEditCls, NULL,
WS_CHILD+WS_VISIBLE+ES_AUTOHSCROLL+WS_BORDER,/
c_EditURLLeft, c_EditURLTop, c_EditURLWidth, c_EditURLHeight,/
hWnd, IDC_EdtURL, g_hInstance, NULL
mov g_hEditURL,
eax
;--- Create read button
invoke CreateWindowEx, NULL,
offset g_szBtnCls,
offset g_szBtnReadText,/
WS_CHILD+WS_VISIBLE, c_BtnReadLeft, c_BtnReadTop, c_BtnReadWidth, c_BtnReadHeight,/
g_hWndMain, IDC_BtnRead, g_hInstance, NULL
mov g_hBtnRead,
eax
;--- Create file ver info editbox
;invoke CreateWindowEx, NULL, addr g_szEditCls, NULL,/
; WS_CHILD+WS_VISIBLE+ES_MULTILINE+WS_HSCROLL+WS_VSCROLL+WS_BORDER,/
; c_EdtHTMLLeft, c_EdtHTMLTop, c_EdtHTMLWidth, c_EdtHTMLHeight,/
; g_hWndMain, IDC_EdtHTML, g_hInstance, NULL
invoke CreateWindowEx, WS_EX_CLIENTEDGE,
OFFSET g_szRichEditClass,/
NULL, c_RichEditStyle, c_EdtHTMLLeft, c_EdtHTMLTop, c_EdtHTMLWidth, c_EdtHTMLHeight,/
hWnd, IDC_EdtHTML, g_hInstance, 0
mov g_hEditVerInfo,
eax
;--- Set the text limit. The default is 64K
invoke PostMessage,g_hEditVerInfo, EM_LIMITTEXT, -1, 0
if d_TestData eq 1
invoke SetWindowText, g_hEditURL,
OFFSET g_szTestURL
endif
.elseif uMsg==WM_COMMAND
.IF lParam
mov
edx, wParam
mov
eax,
edx
shr
edx, 16
.if
dx == BN_CLICKED
.IF
ax == IDC_BtnRead
invoke GetWindowText, g_hEditURL,
addr g_szURL, sizeof g_szURL
test
eax,
eax
.if ZERO?
invoke MessageBox, g_hWndMain,
addr g_szEnterURL,
addr g_szAppName, MB_ICONQUESTION
.else
;invoke MessageBox, g_hWndMain, addr g_szURL, addr g_szAppName, MB_ICONQUESTION
invoke btnShow_Click
.endif
.endif
.ENDIF
.endif
.elseif uMsg==WM_DESTROY
invoke PostQuitMessage, NULL
if c_Resize eq 1
.elseif uMsg==WM_SIZE
invoke ResizeConctrol , lParam
xor
eax,
eax
jz @F
endif
.else
@@:
invoke DefWindowProc, hWnd, uMsg, wParam, lParam
ret
.endif
xor
eax,
eax
ret
WndProc
endp
btnShow_Click
proc
;--- First I initialize the internet and get the socket using this code.
invoke fInitInternet, g_hWndMain
test
eax,
eax
.if !ZERO?
invoke MessageBox, g_hWndMain,
eax,
addr g_szAppName, 0
.else
;--- Second, I connect the socket using this code
invoke fConnect,
addr g_szURL, 80
test
eax,
eax
.if !ZERO?
invoke MessageBox, g_hWndMain,
eax,
addr g_szAppName, 0
.else
invoke SendHttpHead,
addr g_szURL, NULL
;--- Read http reponse head msg
invoke ReadSockData,
addr g_buf, c_BufLen
mov
eax,
offset g_buf
mov
edi,
eax
add
edi, c_BufLen-1
.while (
dword
ptr [
eax]!=0a0d0a0dh) && (
eax <
edi)
inc
eax
.endw
.if (
eax <
edi)
add
eax, 3
;inc eax
mov
byte
ptr [
eax], 0
inc
eax
;push eax
;invoke MessageBox, g_hWndMain, eax, addr g_szAppName, 0
;pop eax
.else
xor
eax,
eax
.endif
push
eax
m_InsCrStr
addr g_buf
;--- write http body msg following http reponse head msg
pop
eax
test
eax,
eax
jz @btnShow_ClickReadBody
m_InsStr
eax
@btnShow_ClickReadBody:
;--- Read http body msg
invoke ReadSockData,
addr g_buf, c_BufLen
push
eax
;--- Search the end mark string
invoke SearchStr,
addr g_buf,
addr g_szHTTP400
inc
eax
jz @F
;no found
dec
eax
mov
byte
ptr [g_buf+
eax], 0
@@:
;--- write http body msg
m_InsCrStr
addr g_buf
pop
eax
cmp
eax, SOCKET_ERROR
je @F
test
eax,
eax
;cmp eax, 0
jnz @btnShow_ClickReadBody
;jg @btnShow_ClickReadBody
@@:
invoke closesocket, sock
invoke WSACleanup
.if (
eax == SOCKET_ERROR )
invoke WSAGetLastError
invoke wsprintf,
addr g_buf,
addr g_szFmt2,
eax
;invoke MessageBox, g_hWndMain, addr g_buf, addr g_szAppName, 0
m_InsCrStr
addr g_buf
.endif
.endif
.endif
ret
btnShow_Click
endp
;eax==0, sucess
fInitInternet
proc hWnd:
dword
; Mad wizard tutorial
invoke WSAStartup, 101h,
addr wsadata
test
eax,
eax
jz @F
mov
eax,
offset g_szFailWSAStartup
jmp @fInitInternetRet
@@:
;--- Create a stream socket for internet use
invoke socket, AF_INET,SOCK_STREAM, 0
;AF_UNSPEC;IPPROTO_TCP
;sock = socket (AF_INET, SOCK_STREAM, 0);
.if
eax != INVALID_SOCKET
mov sock,
eax
;invoke WSAAsyncSelect, sock, hWnd, WM_FSOCKET, FD_CONNECT+FD_READ+FD_CLOSE+FD_WRITE+FD_ACCEPT
xor
eax,
eax
.else
mov
eax,
offset g_szFailIni
.endif
@fInitInternetRet:
ret
fInitInternet
endp
;eax==0, sucess
fConnect
proc
uses
esi
edi lpszHostName:
dword, nPortNumber:
dword
invoke gethostbyname, lpszHostName
cmp
eax, NULL
jne @F
mov
eax,
offset g_szFailGetHostName
jmp @fConnectRet
@@:
;push eax
;invoke MessageBox, g_hWndMain, (hostent ptr [eax]).h_name, addr g_szAppName, MB_OK
;pop eax
mov
eax, (hostent
ptr [
eax]).h_list
;mov eax, [eax+12]
mov
eax, [
eax]
; copy the pointer to the actual IP address into eax
mov
eax, [
eax]
; copy IP address into eax
mov sin.sin_addr.S_un.S_addr,
eax
;mov sin.sin_addr, eax
mov sin.sin_family, AF_INET
invoke htons, nPortNumber
mov sin.sin_port,
ax
invoke connect, sock,
addr sin, sizeof sin
.if
eax==SOCKET_ERROR
invoke WSAGetLastError
mov
eax,
offset g_szConnect
.endif
@fConnectRet:
ret
fConnect
endp
SendHttpHead
proc lpszHostName: LPSTR, lpParam: LPSTR
invoke wsprintf,
addr g_buf,
addr g_szFmt1, lpParam, lpszHostName
m_InsStr
addr g_buf
;invoke MessageBox, g_hWndMain, addr g_buf, addr g_szAppName, 0
invoke send, sock,
addr g_buf, sizeof g_buf, 0
ret
SendHttpHead
endp
ReadSockData
proc lpszBuffer: LPSTR, dwMax_buf_len:
dword
invoke RtlZeroMemory, lpszBuffer, dwMax_buf_len
mov
edi, lpszBuffer
mov
esi, dwMax_buf_len
dec
esi
@@:
push
esi
push
edi
invoke recv, sock,
edi,
esi, 0
pop
edi
pop
esi
add
edi,
eax
sub
esi,
eax
jz @get_http_pageRet
;pushad
;invoke MessageBox, g_hWndMain, addr g_szAppName, addr g_szAppName, 0
;popad
cmp
eax, 0
jg @B
@get_http_pageRet:
ret
ReadSockData
endp
; if eax=-1, no found
; else eax = sub string position
SearchStr
proc lpszOrgStr: LPSTR, lpszSubStr: LPSTR
local dwPos:
dword
mov
edi, lpszSubStr
cmp
byte
ptr [
edi], 0
je @NoFound
mov
esi, lpszOrgStr
mov dwPos,
esi
@SearchStrLoop1Begin:
mov
al,
byte
ptr [
esi]
test
al,
al
jz @NoFound
cmp
al,
byte
ptr [
edi]
jne @SearchStrLoop1Next
@SearchStrLoop2Begin:
inc
esi
inc
edi
mov
ah,
byte
ptr [
edi]
test
ah,
ah
jz @Found
mov
al,
byte
ptr [
esi]
test
al,
al
jz @NoFound
cmp
al,
ah
je @SearchStrLoop2Begin
mov
edi, lpszSubStr
@SearchStrLoop1Next:
inc dwPos
mov
esi, dwPos
jmp @SearchStrLoop1Begin
@NoFound:
xor
eax,
eax
;mov eax, -1
dec
eax
jmp @SearchStrRet
@Found:
mov
eax, dwPos
sub
eax, lpszOrgStr
@SearchStrRet:
ret
SearchStr
endp
if c_Resize eq 1
ResizeConctrol
PROC wh:
DWORD
;--- Get main window width
mov
eax, wh
mov
ecx,
eax
movzx
eax,
ax
; width
push
eax
;push for resizing the ver info edit
sub
eax, 4
shr
ecx, 16
; height
sub
ecx, c_EditURLHeight+10
invoke MoveWindow, g_hEditVerInfo, 2, c_EdtHTMLTop,
eax,
ecx,
FALSE
;--- Resize the Get button
pop
eax
sub
eax, 5+c_BtnReadWidth
push
eax
invoke MoveWindow, g_hBtnRead,
eax, c_BtnReadTop, c_BtnReadWidth, c_BtnReadHeight,
TRUE
;--- Resize the file spec editbox
pop
eax
sub
eax, 10
invoke MoveWindow, g_hEditURL, c_EditURLLeft, c_EditURLTop,
eax, c_EditURLHeight,
TRUE
ret
ResizeConctrol
ENDP
endif
; c_Resize
end