域名解析
URL:统一资源定位符
http://www.sina.com.cn/web/index.html
- http:// - 协议
- www.sina.com.cn - 域名
- /web/index.html - 路径
DNS - 域名解析服务
www.sina.com.cn -> 202.60.121.55, ...
...
#include <netdb.h>
struct hostent* gethostbyname (char const* name);
返回主机条目信息结构指针,失败返回NULL。
hostent
h_name - 字符指针,指向主机官方名字符串
h_aliases - 指向字符指针数组的指针,该数组中的每个元素都是字符指针,指向一个别名字符串,最后一个元素是一个NULL指针
h_addrtype - 地址类型,AF_INET(IPv4)
h_length - 地址字节数, 4字节(IPv4)
h_addr_list - 指向结构体指针数组的指针,该数组中的每个元素都指向一个struct in_addr类型的结构体,其中存放着主机一个IP地址,最后一个元素是一个空指针
#include <arpa/inet.h>
char* inet_ntoa (struct in_addr addr);
代码示例
- dns.c
#include <netdb.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char* argv[]) {
if (argc < 2) {
printf ("用法:%s <主机域名>\n",
argv[0]);
return EXIT_FAILURE;
}
struct hostent* host =
gethostbyname (argv[1]);
if (! host) {
perror ("gethostbyname");
return EXIT_FAILURE;
}
if (host->h_addrtype == AF_INET) {
printf ("主机官方名:\n");
printf ("\t%s\n", host->h_name);
printf ("主机别名表:\n");
char** pp = host->h_aliases;
while (*pp)
printf ("\t%s\n", *pp++);
printf ("主机地址表:\n");
struct in_addr** pa =
(struct in_addr**)
host->h_addr_list;
while (*pa)
printf ("\t%s\n",
inet_ntoa (**pa++));
}
return EXIT_SUCCESS;
}
- 执行结果
- 请求
GET /web/index.html HTTP/1.0<CR><NL>
Host: www.sina.com.cn
Accept: */*
Connection: Close/Keep-Alive
User-Agent: Mozilla/5.0
Referer: www.sina.com.cn<CR><NL><CR><NL>
- 响应
HTTP/1.0 200 OK
Server: nginx
Date: Wed, 26 Oct 2016 10:52:04 GMT
Content-Type: text/html;charset=UTF-8
Content-length: 1234
Connection: Close/Keep-Alive<CR><NL><CR><NL>
<html>
<head> ... </head>
<body> ... </body>
</html>
代码示例
- http.c
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <strings.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char* argv[]) {
if (argc < 3) {
printf ("用法:%s <主机地址> "
"<主机域名> [<资源路径>]\n",
argv[0]);
return EXIT_FAILURE;
}
char const* ip = argv[1];
char const* domain = argv[2];
char const* path = argc < 4 ?
"" : argv[3];
int sockfd = socket (PF_INET,
SOCK_STREAM, 0);
if (sockfd == -1) {
perror ("socket");
return EXIT_FAILURE;
}
struct sockaddr_in addr;
bzero (&addr, sizeof (addr));
addr.sin_family = AF_INET;
addr.sin_port = htons (80);
if (! inet_aton (ip,
&addr.sin_addr)) {
perror ("inet_aton");
return EXIT_FAILURE;
}
if (connect (sockfd,
(struct sockaddr*)&addr,
sizeof (addr)) == -1) {
perror ("connect");
return EXIT_FAILURE;
}
char request[1024];
sprintf (request,
"GET /%s HTTP/1.0\r\n"
"Host: %s\r\n"
"Accept: */*\r\n"
"Connection: Close\r\n"
"User-Agent: Mozilla/5.0\r\n"
"Referer: %s\r\n\r\n",
path, domain, domain);
if (send (sockfd, request,
strlen (request), 0) == -1) {
perror ("send");
return EXIT_FAILURE;
}
for (;;) {
char respond[1024] = {};
ssize_t rlen = recv (sockfd,
respond,
sizeof (respond) - 1, 0);
if (rlen == -1) {
perror ("recv");
return EXIT_FAILURE;
}
if (! rlen)
break;
printf ("%s", respond);
}
printf ("\n");
close (sockfd);
return EXIT_SUCCESS;
}
- 执行结果
包含头文件
#include <regex.h>
- regcomp - 编译正则表达式
- regexec - 执行正则匹配
- regfree - 释放正则表达式内存
... href=" http://www.sina.com.cn/web/index.html " ...
href="\s*\([^ >"]*\)\s*"
\s - 匹配任意空白字符(空格、制表、回车、换行)
* - 重复前一个匹配项任意次
[^ >"] - 匹配任意除空格大于号双引号以外的字符
\(和\) - 定义子表达式
代码示例
- regex.c
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char* argv[]) {
if (argc < 2) {
printf ("用法:%s <HTML文件>\n",
argv[0]);
return EXIT_FAILURE;
}
FILE* fp = fopen (argv[1], "r");
if (! fp) {
perror ("fopen");
return EXIT_FAILURE;
}
if (fseek (fp, 0, SEEK_END) == -1) {
perror ("fseek");
return EXIT_FAILURE;
}
long size = ftell (fp);
if (size == -1) {
perror ("ftell");
return EXIT_FAILURE;
}
char* buf= (char*)malloc (size + 1);
if (! buf) {
perror ("malloc");
return EXIT_FAILURE;
}
if (fseek (fp, 0, SEEK_SET) == -1) {
perror ("fseek");
return EXIT_FAILURE;
}
if (fread (buf, 1, size, fp)!=size) {
perror ("fread");
return EXIT_FAILURE;
}
buf[size] = '\0';
fclose (fp);
regex_t ex;
int error = regcomp (&ex,
"href=\"\\s*\\([^ >\"]*\\)\\s*\"",0);
if (error) {
char errInfo[1024];
regerror (error, &ex, errInfo,
sizeof (errInfo));
printf ("regcomp: %s\n",
errInfo);
return EXIT_FAILURE;
}
char const* html = buf;
regmatch_t match[2];
while (regexec (&ex, html, 2, match,
0) != REG_NOMATCH) {
html += match[1].rm_so;
size_t len = match[1].rm_eo -
match[1].rm_so;
char* url = (char*)malloc (
len + 1);
memcpy (url, html, len);
url[len] = '\0';
printf ("%s\n", url);
free (url);
html += len + match[0].rm_eo -
match[1].rm_eo;
}
regfree (&ex);
free (buf);
return EXIT_SUCCESS;
}
- 执行结果