概念:
- cURL:Client URL Library Function
- 官方定义为:使用URL语法传输数据的命令行工具。
- 通俗解释:cURL客户端向服务器请求资源的工具
可以做什么:
- 网页资源 —- 编写网页爬虫
- WebService数据接口资源 —- 动态获取接口数据,比如天气,号码归属等等
- FTP服务器里面的文件资源 — 下载FTP服务器里面的文件
- 其他资源 —- 所有网络上的资源都可以使用cURL访问和下载到
PHP中使用curl
EXAMPLE-1-爬虫显示一个页面
1 2 3 4 5 |
<?php $curl=curl_init('http://www.baidu.com'); curl_exec($curl); curl_close($curl); ?> |
EXAMPLE-2-修改页面内容
1 2 3 4 5 6 7 8 |
<?php $curlobj = curl_init(); // 初始化 curl_setopt($curlobj, CURLOPT_URL, "http://www.baidu.com"); // 设置访问网页的URL curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, true); // 执行之后不直接打印出来 $output=curl_exec($curlobj); // 执行 curl_close($curlobj); // 关闭cURL echo str_replace("百度","屌丝",$output); ?> |
EXAMPLE-3-爬虫获取天气数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
<?php $data = 'theCityName=北京'; $curlobj = curl_init(); curl_setopt($curlobj,CURLOPT_URL,"http://www.webxml.com.cn/WebServices/WeatherWebService.asmx/getWeatherbyCityName"); curl_setopt($curlobj,CURLOPT_HEADER,0); // 不需要header curl_setopt($curlobj,CURLOPT_RETURNTRANSFER,1); // 不直接打印 curl_setopt($curlobj,CURLOPT_POST,1); // 以post方式请求 curl_setopt ($curlobj, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']); // 指明客户端 curl_setopt($curlobj,CURLOPT_POSTFIELDS,$data); //填写post的数据 curl_setopt($curlobj,CURLOPT_HTTPHEADER,array("application/x-www-form-urlencoded","charset=utf-8","Content-length:".strlen($data))); //设置http的header $rtn = curl_exec($curlobj); if(!curl_errno($curlobj)){ echo $rtn; }else{ echo 'Curl error'.curl_error($curlobj); } curl_close($curlobj); |
EXAMPLE-4-ftp下载一个文件到本地
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
<?php $curlobj = curl_init(); curl_setopt($curlobj, CURLOPT_URL, "ftp://192.168.1.100/downloaddemo.txt"); curl_setopt($curlobj, CURLOPT_HEADER, 0); curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curlobj, CURLOPT_TIMEOUT, 300); // times out after 300s curl_setopt($curlobj, CURLOPT_USERPWD, "peter.zhou:123456");//FTP用户名:密码 // Sets up the output file $outfile = fopen('dest.txt', 'wb');//保存到本地的文件名 curl_setopt($curlobj, CURLOPT_FILE, $outfile); $rtn = curl_exec($curlobj); fclose($outfile); if(!curl_errno($curlobj)){ // $info = curl_getinfo($curlobj); // print_r($info); echo "RETURN: " . $rtn; } else { echo 'Curl error: ' . curl_error($curlobj); } curl_close($curlobj); ?> |
EXAMPLE-5-ftp文件上传
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
<?php $curlobj = curl_init(); $localfile = 'ftp01.php'; $fp = fopen($localfile, 'r'); curl_setopt($curlobj, CURLOPT_URL, "ftp://192.168.1.100/ftp01_uploaded.php"); curl_setopt($curlobj, CURLOPT_HEADER, 0); curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curlobj, CURLOPT_TIMEOUT, 300); // times out after 300s curl_setopt($curlobj, CURLOPT_USERPWD, "peter.zhou:123456");//FTP用户名:密码 curl_setopt($curlobj, CURLOPT_UPLOAD, 1); curl_setopt($curlobj, CURLOPT_INFILE, $fp); curl_setopt($curlobj, CURLOPT_INFILESIZE, filesize($localfile)); $rtn = curl_exec($curlobj); fclose($fp); if(!curl_errno($curlobj)){ echo "Uploaded successfully."; } else { echo 'Curl error: ' . curl_error($curlobj); } curl_close($curlobj); ?> |
EXAMPLE-6-登录网站获取个人信息
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
<?php $data='username=demo_peter@126.com&password=123qwe&remember=1'; $curlobj = curl_init(); // 初始化 curl_setopt($curlobj, CURLOPT_URL, "http://www.imooc.com/user/login"); // 设置访问网页的URL curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, true); // 执行之后不直接打印出来 // Cookie相关设置,这部分设置需要在所有会话开始之前设置 date_default_timezone_set('PRC'); // 使用Cookie时,必须先设置时区 curl_setopt($curlobj, CURLOPT_COOKIESESSION, TRUE); curl_setopt($curlobj, CURLOPT_HEADER, 0); // 注释掉这行,因为这个设置必须关闭安全模式 以及关闭open_basedir,对服务器安全不利 //curl_setopt($curlobj, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curlobj, CURLOPT_POST, 1); curl_setopt($curlobj, CURLOPT_POSTFIELDS, $data); curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8", "Content-length: ".strlen($data) )); curl_exec($curlobj); // 执行 curl_setopt($curlobj, CURLOPT_URL, "http://www.imooc.com/space/index"); curl_setopt($curlobj, CURLOPT_POST, 0); curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("Content-type: text/xml" )); $output=curl_redir_exec($curlobj); // 执行 curl_close($curlobj); // 关闭cURL echo $output; /** * 自定义实现页面链接跳转抓取 */ function curl_redir_exec($ch,$debug="") { static $curl_loops = 0; static $curl_max_loops = 20; if ($curl_loops++ >= $curl_max_loops) { $curl_loops = 0; return FALSE; } curl_setopt($ch, CURLOPT_HEADER, true); // 开启header才能够抓取到重定向到的新URL curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $data = curl_exec($ch); // 分割返回的内容 $h_len = curl_getinfo($ch, CURLINFO_HEADER_SIZE); $header = substr($data,0,$h_len); $data = substr($data,$h_len - 1); $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ($http_code == 301 || $http_code == 302) { $matches = array(); preg_match('/Location:(.*?)\n/', $header, $matches); $url = @parse_url(trim(array_pop($matches))); // print_r($url); if (!$url) { //couldn't process the url to redirect to $curl_loops = 0; return $data; } $last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); if (!isset($url['scheme'])) $url['scheme'] = $last_url['scheme']; if (!isset($url['host'])) $url['host'] = $last_url['host']; if (!isset($url['path'])) $url['path'] = $last_url['path']; $new_url = $url['scheme'] . '://' . $url['host'] . $url['path'] . (isset($url['query'])?'?'.$url['query']:''); curl_setopt($ch, CURLOPT_URL, $new_url); return curl_redir_exec($ch); } else { $curl_loops=0; return $data; } } ?> |