用户工具

站点工具


nginx_强过防火墙

Nginx:强过防火墙

  • 要实现的目标:优化资源利用,优化客户访问,在不影响用户体验的前提下,提供必要的防护。
  • 业务处理原则:动静分离、人机筛选、将资源尽量倾向于正常访客。
  • 本文档的主题:灵活利用Nginx及其扩展,配置一个具有防护优化功能的前端代理。

本文以常见的DiscuzX程序为范例

操作系统设置

操作系统:FreeBSD10.0+ZFS

启用httpready, aio, tmpfs

注意不要重复设置

# echo 'accf_http_load=”YES”' >> /boot/loader.conf
# echo 'aio_load="YES"' >> /boot/loader.conf
# echo 'tmpfs /tmp tmpfs rw,mode=777 0 0' >> /etc/fstab
# kldload accf_http aio
# umount /tmp && mount /tmp

为Nginx建立缓存挂载点

# zfs create -o atime=off -o setuid=off -o checksum=off -o mountpoint=/var/tmp/nginx zroot/nginx
# chown www /var/tmp/nginx

Nginx配置

软件版本:Nginx1.4

编译时选择的模块

# cd /usr/ports/www/nginx && make config && make install clean
FILE_AIO
GOOGLE_PERFTOOLS
HTTP
HTTP_ADDITION
HTTP_GEOIP
HTTP_REALIP
HTTP_REWRITE
HTTP_STATUS
CACHE_PURGE
HEADERS_MORE
LUA
# mkdir /var/log/nginx

如果需要其他功能,请自行选择。

除非特指,本文中的配置文件都基于/usr/local/etc/nginx/目录,下面是nginx.conf主配置文件:

main 设置

user                          www;
# 子进程,等于或倍于cpu核心数
worker_processes              4; 
error_log                     /var/log/nginx/nginx-error.log info;
pid                           /var/run/nginx.pid;
google_perftools_profiles     /tmp/nginx_gperf;

events 设置

events                        {
  worker_connections          1024;
  use                         kqueue;
}

http 设置

http                          {
  aio                         sendfile;
  sendfile                    on;
  tcp_nopush                  on;
  directio                    4m;
  directio_alignment          4096;
  recursive_error_pages       on;
  set_real_ip_from            127.0.0.0/8;
  set_real_ip_from            unix:;
  real_ip_header              X-Real-IP;
  default_type                application/octet-stream;
  server_tokens               off;
  server_name_in_redirect     off;
  keepalive_timeout           120;
  log_format                  main '$remote_addr | $time_local | $host | $request | $status | '
                                   '$body_bytes_sent | $http_referer | $http_user_agent | '
                                   '$http_x_forwarded_for';
  log_format                  cache '$remote_addr | $time_local | $host | $request | $status | '
                                    '$body_bytes_sent | $http_referer | $http_user_agent | '
                                    '$http_x_forwarded_for | $upstream_cache_status';
  client_body_temp_path       /tmp/nginx_tmp_client;
  fastcgi_temp_path           /tmp/nginx_tmp_fcgi;
  uwsgi_temp_path             /tmp/nginx_tmp_uwsgi;
  scgi_temp_path              /tmp/nginx_tmp_sgi;
  proxy_temp_path             /tmp/nginx_tmp_proxy;
  proxy_cache_path            /var/tmp/nginx/cache levels=2:2 keys_zone=cache_disk:256m inactive=1d;
  limit_req_zone              $binary_remote_addr zone=scriptzone:4m rate=10r/s;
  limit_req_zone              $binary_remote_addr zone=staiczone:4m rate=30r/s;
  limit_conn_zone             $binary_remote_addr zone=clientzone:4m;
  include                     mime.types;
  include                     map.conf;
  include                     upstream.conf;
  server                      {
    listen                    80 accept_filter=httpready;
    include                   front.conf;
  }
  # 可以用curl http://127.0.0.1:888来查看nginx状态
  server                      {
    listen                    127.0.0.1:888;
    stub_status               on;
    access_log                off;
  }
  # 如果做本机前端,把站点配置放在指定目录
  include                     site/*.conf;
}

主要的筛选和流程通过map.conf和front.conf配置文件实现。

通过map实现访客和资源归类

下面是map.conf的内容

# 基于geo的ip映射
# ip库的统计方式有很多
# 可以用网上整理好的,甚至可以写个脚本,根据日志中记录的ip去whois查询
# 此处仅以某些离线下载服务器ip为例
geo                                     $geo {
  ranges;
  101.226.180.1-101.226.180.255         oldl;
  111.161.24.1-111.161.24.255           oldl;
  112.117.217.1-112.117.217.255         oldl;
  112.90.17.1-112.90.17.255             oldl;
  114.112.202.1-114.112.202.255         oldl;
  114.80.183.1-114.80.189.255           oldl;
  114.80.245.1-114.80.245.255           oldl;
  116.55.230.1-116.55.230.255           oldl;
  117.34.91.1-117.34.91.255             oldl;
  118.122.36.1-118.122.36.255           oldl;
  118.122.87.1-118.122.88.255           oldl;
  119.120.94.1-119.120.94.255           oldl;
  119.144.9.1-119.144.9.255             oldl;
  119.147.41.1-119.147.41.255           oldl;
  119.178.12.1-119.178.12.255           oldl;
  119.188.11.1-119.188.12.255           oldl;
  119.188.129.1-119.188.129.255         oldl;
  119.188.13.1-119.188.15.255           oldl;
  119.188.50.1-119.188.50.255           oldl;
  119.189.1.1-119.189.1.255             oldl;
  119.84.114.1-119.84.114.255           oldl;
  119.97.178.1-119.97.178.255           oldl;
  119.97.183.1-119.97.183.255           oldl;
  121.10.120.1-121.10.120.255           oldl;
  121.10.137.1-121.10.137.255           oldl;
  121.10.24.1-121.10.24.255             oldl;
  121.14.222.1-121.14.222.255           oldl;
  121.14.82.1-121.14.82.255             oldl;
  121.9.209.1-121.9.209.255             oldl;
  121.9.246.1-121.9.246.255             oldl;
  122.141.227.1-122.141.227.255         oldl;
  122.141.235.1-122.141.235.255         oldl;
  122.143.1.1-122.143.6.255             oldl;
  122.228.241.1-122.228.241.255         oldl;
  122.228.255.1-122.228.255.255         oldl;
  123.129.219.1-123.129.219.255         oldl;
  123.129.242.1-123.129.242.255         oldl;
  123.183.223.1-123.183.223.255         oldl;
  124.232.148.1-124.232.148.255         oldl;
  124.95.156.1-124.95.156.255           oldl;
  124.95.172.1-124.95.172.255           oldl;
  124.95.173.1-124.95.173.255           oldl;
  124.95.174.1-124.95.174.255           oldl;
  125.221.46.1-125.221.46.255           oldl;
  125.39.148.1-125.39.150.255           oldl;
  125.39.72.1-125.39.72.255             oldl;
  125.46.42.1-125.46.42.255             oldl;
  125.78.242.1-125.78.242.255           oldl;
  125.78.247.1-125.78.247.255           oldl;
  180.153.115.1-180.153.115.255         oldl;
  180.153.91.1-180.153.91.255           oldl;
  182.118.125.1-182.118.125.255         oldl;
  182.118.13.1-182.118.18.255           oldl;
  182.140.142.1-182.140.142.255         oldl;
  183.136.156.1-183.136.156.255         oldl;
  183.60.208.1-183.60.208.255           oldl;
  183.60.209.1-183.60.209.255           oldl;
  183.63.33.1-183.63.33.255             oldl;
  183.94.216.1-183.94.219.255           oldl;
  183.94.228.1-183.94.231.255           oldl;
  183.94.237.1-183.94.239.255           oldl;
  211.137.100.1-211.137.100.255         oldl;
  211.162.73.1-211.162.73.255           oldl;
  211.98.168.1-211.98.171.255           oldl;
  218.21.68.1-218.21.68.255             oldl;
  218.26.232.1-218.26.232.255           oldl;
  218.59.144.1-218.59.144.255           oldl;
  218.6.13.1-218.6.13.255               oldl;
  218.75.172.1-218.75.172.255           oldl;
  219.129.83.1-219.129.83.255           oldl;
  219.134.132.1-219.134.132.255         oldl;
  220.113.9.1-220.113.9.255             oldl;
  220.115.240.1-220.115.240.255         oldl;
  220.249.103.1-220.249.103.255         oldl;
  221.203.179.1-221.203.179.255         oldl;
  221.204.204.1-221.204.204.255         oldl;
  221.204.220.1-221.204.220.255         oldl;
  221.215.87.1-221.215.87.255           oldl;
  221.235.189.1-221.235.189.255         oldl;
  221.235.205.1-221.235.205.255         oldl;
  221.238.25.1-221.238.25.255           oldl;
  221.4.246.1-221.4.246.255             oldl;
  221.5.8.1-221.5.8.255                 oldl;
  222.141.53.1-222.141.53.255           oldl;
  222.186.19.1-222.186.19.255           oldl;
  222.73.133.1-222.73.133.255           oldl;
  222.73.49.1-222.73.49.255             oldl;
  58.222.25.1-58.222.25.255             oldl;
  58.251.57.1-58.251.60.255             oldl;
  58.251.61.1-58.251.61.255             oldl;
  58.252.209.1-58.252.209.255           oldl;
  58.254.134.1-58.254.134.255           oldl;
  58.255.249.1-58.255.249.255           oldl;
  58.255.250.1-58.255.253.255           oldl;
  58.61.152.1-58.61.152.255             oldl;
  58.61.39.1-58.61.39.255               oldl;
  58.67.137.1-58.67.137.255             oldl;
  60.18.146.1-60.18.146.255             oldl;
  60.18.147.1-60.18.147.255             oldl;
  60.19.64.1-60.19.64.255               oldl;
  60.21.219.1-60.21.219.255             oldl;
  60.214.64.1-60.214.64.255             oldl;
  60.217.235.1-60.217.235.255           oldl;
  60.221.254.1-60.221.254.255           oldl;
  61.137.191.1-61.137.191.255           oldl;
  61.138.177.1-61.138.177.255           oldl;
  61.139.103.1-61.139.103.255           oldl;
  61.147.76.1-61.147.76.255             oldl;
  61.147.81.1-61.147.81.255             oldl;
  61.147.94.1-61.147.94.255             oldl;
  61.152.105.1-61.152.105.255           oldl;
  61.178.227.1-61.178.227.255           oldl;
  61.183.55.1-61.183.55.255             oldl;
  61.188.190.1-61.188.190.255           oldl;
  61.235.71.1-61.235.71.255             oldl;
  61.54.12.1-61.54.12.255               oldl;
}

通过user-agent映射区分蜘蛛/机器人

map                                     $http_user_agent $ifbot {
  "~*Bot"                               isbot;
  "~*Spider"                            isbot;
  "~*archive"                           isbot;
  "~*search"                            isbot;
  "~*Yahoo"                             isbot;
  "~Mediapartners-Google"               isbot;
  "~*Ruby"                              isbot;
  "~*Player"                            isbot;
  "~*Go\ http\ package"                 isbot;
  "~*Lynx"                              isbot;
  "~*Sleuth"                            isbot;
  "~*Python"                            isbot;
  "~*Wget"                              isbot;
  "~*curl"                              isbot;
  "~*perl"                              isbot;
  "~*libfetch"                          isbot;
}

通过uri映射动态资源

map                $uri $my_filetype {
  "~*\.py$"        script;
  "~*\.rb$"        script;
  "~*\.fcgi$"      script;
  "~*\.cgi$"       script;
  "~*\.php$"       script;
  "~*\.pl$"        script;
}

通过request_uri映射rewrite出来的伪静态

map                                             $request_uri $my_uritype {
  "~/topic-(.+)\.html$"                         f_static;
  "~/article-([0-9]+)-([0-9]+)\.html$"          f_static;
  "~/forum-(\w+)-([0-9]+)\.html$"               f_static;
  "~/thread-([0-9]+)-([0-9]+)-([0-9]+)\.html$"  f_static;
  "~/group-([0-9]+)-([0-9]+)\.html$"            f_static;
  "~/space-(username|uid)-(.+)\.html$"          f_static;
  "~/blog-([0-9]+)-([0-9]+)\.html$"             f_static;
  "~/(fid|tid)-([0-9]+)\.html$"                 f_static;
}

您也可以根据具体需要来做更多的变量映射。

用lua来搞定伪装的浏览器

cookie.conf内容 此处参考来源

rewrite_by_lua '
  local rdmnum = ngx.var.cookie_rdmnum
  if(rdmnum == nil) then
    rdmnum = math.random(999999)
  end
  local rdmid = ngx.md5("FreeBSD" .. ngx.var.remote_addr .. rdmnum)
  if (ngx.var.cookie_rdmid ~= rdmid) then
    ngx.header["Set-Cookie"] = {"rdmid=" .. rdmid, "rdmnum=" .. rdmnum}
    return ngx.redirect(ngx.var.scheme .. "://" .. ngx.var.host .. ngx.var.request_uri)
  end
';

front.conf内容及流程简介

access_log                 /var/log/nginx/front-access.log main;
error_log                  /var/log/nginx/front-error.log;
# 添加Vary头,解决浏览器gzip识别问题
more_set_headers           'Vary: Accept-Encoding, User-Agent';
# 对于下载流予以限速
limit_rate_after           8m;
limit_rate                 20k;
# 对总体请求和连接加以限制
# 突发请求300个/秒,正常浏览足够用
# 当然也可以根据具体情况调整
limit_req                  zone=staiczone burst=10 nodelay;
limit_conn                 clientzone 10;
# cache purge放在最前面,保证可以正常清理
location                   ~ /purge(/.*) {
  access_log               /var/log/nginx/purge-disk.log main;
  limit_conn               clientzone 1;
  limit_req                zone=scriptzone;
  include                  cookie.conf;
  proxy_cache_purge        cache_disk $host$1$is_args$args;
}

location                   / {
  # 让离线下载服务器去自娱自乐……
  if                       ( $geo = "oldl" ) {
    access_log             /var/log/nginx/block-badip.log main;
    return                 301 $scheme://$remote_addr$request_uri; 
    break;
  }
  # 分离出蜘蛛/机器人
  if                       ( $ifbot = "isbot" ) {
    return                 482;
    break;
  }
  # 这里用到了Nginx的error_page定向功能
  # 不用担心这些奇怪的http代码会直接应答给访客
  # 实际返回代码以最终定位的结果为准
  # 通过这个功能可以完成相对复杂的流程处理
  # 所以在http主配置中打开了recursive_error_pages
  return                   481;
  error_page               481 = @human;
  error_page               482 = @isbot;
}

# 这段用来处理浏览器访问
location                   @human {
  internal;
  error_log                /var/log/nginx/human-static-error.log;
  access_log               /var/log/nginx/human-static-access.log cache;
  if                       ( $request_method !~ (GET|HEAD) ) {
    return                 483;
    break;
  }
  if                       ( $my_filetype = "script"  ) {
    return                 483;
    break;
  }
  if                       ( $my_uritype = "f_static" ) {
    return                 483;
    break;
  }
  if                       ( $request_uri ~ (/$|/\?) ) {
    return                 483;
    break;
  }
  error_page               483 = @script;
  include                  proxy.conf;
  # 上面几段if已经将动态资源重新定位
  # 所以这里可以安全地缓存
  # 如果只做本机代理的话,可以不用proxy_cache
  proxy_cache              cache_disk;
  # 对于静态资源,客户端提交cookie没什么意义
  # 据说提交的cookie还会影响缓存命中率
  # 虽然未经实测
  # 不过清理掉对静态资源提交cookie还是安全的
  more_clear_input_headers 'Cookie';
  proxy_pass               http://backend;
}
location                   @script {
  internal;
  error_log                /var/log/nginx/human-script-error.log;
  access_log               /var/log/nginx/human-script-access.log main;
  # 对于动态资源的访问进一步限制
  limit_req                zone=scriptzone burst=3 nodelay;
  # 对伪装的浏览器加以鉴别
  # 排除discuz通过flash上传
  if                       ( $request_uri !~ "~mod\=swfupload\&action\=swfupload" ) {
    include                cookie.conf;    
  }
  include                  proxy.conf;
  proxy_pass               http://backend;
}

# 下面这段处理蜘蛛/机器人的访问
location                   @isbot {
  internal;
  error_log                /var/log/nginx/bot-static-error.log;
  access_log               /var/log/nginx/bot-static-access.log cache;
  # 只允许机器人的抓取行为
  if                       ( $request_method !~ (GET|HEAD) ) {
    access_log             /var/log/nginx/bot-block.log main;
    return                 403;
    break;
  }
  if                       ( $my_filetype = "script"  ) {
    return                 484;
    break;
  }
  if                       ( $my_uritype = "f_static" ) {
    return                 484;
    break;
  }
  if                       ( $request_uri ~ (/$|/\?) ) {
    return                 484;
  }
  error_page               484 = @botscript;
  include                  proxy.conf;
  # 如果只做本机代理的话,可以不用proxy_cache
  proxy_ignore_headers     Set-Cookie Expires Cache-Control X-Accel-Expires X-Accel-Redirect;
  proxy_cache              cache_disk;
  more_clear_input_headers 'Cookie';
  proxy_pass               http://backend;
}
location                   @botscript{
  internal;
  error_log                /var/log/nginx/bot-script-error.log;
  access_log               /var/log/nginx/bot-script-access.log cache;
  # 对于机器人访问动态资源进行必要的限制
  limit_conn               clientzone 10;
  limit_req                zone=scriptzone;
  include                  proxy.conf;
  # 对于机器人提供缓存
  # 虽然机器人是遍历式的访问
  # 但是多个引擎同时抓取拖慢服务器还是常有的
  # 缓存一下可以避免这个问题
  proxy_ignore_headers     Set-Cookie Expires Cache-Control X-Accel-Expires X-Accel-Redirect;
  proxy_cache              cache_disk;
  more_clear_input_headers 'Cookie';
  proxy_pass               http://backend;
}

proxy.conf内容

# 合并回源,减小upstream压力
proxy_cache_lock                on;
proxy_cache_lock_timeout        2;
# 缓存时间跟proxy_cache_path设置一致
# nginx缓存功能比较简单,检查更新没那么智能
# 但是命中率还令人满意
proxy_cache_valid               200 304 1d;
proxy_cache_valid               any 1m;
# 下面两项对于upstream的keepalive是必须的
proxy_http_version              1.1;
proxy_set_header                Connection "";
proxy_cache_key                 $host$request_uri;
proxy_redirect                  off;
proxy_set_header                Host $host;
proxy_set_header                X-Real-IP $remote_addr;
proxy_connect_timeout           60;
# 即使客户端断开也让nginx从upstream获取完整文件,为下次访问做好缓存
# 默认是关闭的,可以根据需要自行调整
proxy_ignore_client_abort       on;
proxy_read_timeout              180;
proxy_send_timeout              180;
proxy_buffer_size               4k;
proxy_buffering                 on;
proxy_buffers                   8 4k;
proxy_busy_buffers_size         8k;
proxy_temp_file_write_size      128k;
proxy_cache_use_stale           error timeout invalid_header updating http_500 http_502 http_503 http_504;

upstream.conf内容

upstream        backend {
  server        12.34.56.78:90;
  keepalive     128;
}

如果做本机代理,还可以这么写

upstream        backend {
  server        unix:/tmp/nginx-local.sock;
}

网站配置

很普通的设置,权为保持文档完整

site/yourdomain.conf

server                  {
  # 都在同一台机器上的话,通过socket方式连接可以节省tcp消耗
  listen                unix:/tmp/nginx-local.sock;
  server_name           .yourdomain.com;
  index                 index.php;
  root                  /path-to-your-site;
  gzip                  on;
  access_log            /var/log/nginx/yourdomain-access.log main;
  error_log             /var/log/nginx/yourdomain-error.log;

  location / {
    rewrite             ^([^\.]*)/topic-(.+)\.html$ $1/portal.php?mod=topic&topic=$2 last;
    rewrite             ^([^\.]*)/article-([0-9]+)-([0-9]+)\.html$ $1/portal.php?mod=view&aid=$2&page=$3 last;
    rewrite             ^([^\.]*)/forum-(\w+)-([0-9]+)\.html$ $1/forum.php?mod=forumdisplay&fid=$2&page=$3 last;
    rewrite             ^([^\.]*)/thread-([0-9]+)-([0-9]+)-([0-9]+)\.html$ $1/forum.php?mod=viewthread&tid=$2&extra=page%3D$4&page=$3 last;
    rewrite             ^([^\.]*)/group-([0-9]+)-([0-9]+)\.html$ $1/forum.php?mod=group&fid=$2&page=$3 last;
    rewrite             ^([^\.]*)/space-(username|uid)-(.+)\.html$ $1/home.php?mod=space&$2=$3 last;
    rewrite             ^([^\.]*)/blog-([0-9]+)-([0-9]+)\.html$ $1/home.php?mod=space&uid=$2&do=blog&id=$3 last;
    rewrite             ^([^\.]*)/(fid|tid)-([0-9]+)\.html$ $1/index.php?action=$2&value=$3 last;
    rewrite             ^([^\.]*)/([a-z]+[a-z0-9_]*)-([a-z0-9_\-]+)\.html$ $1/plugin.php?id=$2:$3 last;
  }
  # 安全原则:可写不给执行,执行不给可写
  location              ~ /(data|config|template)/.*\.php$ {
    return              403;
    break;
  }

  location              ~ .*\.php$ {
    # php-fpm也通过socket连接
    include             fastcgi_params;
    fastcgi_param       SCRIPT_FILENAME $document_root$fastcgi_script_name;
    fastcgi_pass        unix:/tmp/php-fpm-yourdomain.sock;
    fastcgi_index       index.php;
    expires             -1;
  }
}

滚动日志

以星期为周期

#!/bin/sh
today=$(date +%a)
logdir='/var/log/nginx'

if [ ! "${today}" ] || [ ! "${logdir}" ]; then
  exit
else
  rm -rf ${logdir}.${today}
  mv ${logdir} ${logdir}.${today}
  mkdir ${logdir}
  service nginx reload
fi

exit

结语

在这种模式下,php负载和灌水机的垃圾帖大大减少,缓存命中率也在80%以上。

本文权当抛砖引玉,如果有不合适的地方敬请指出。

同时欢迎到论坛讨论。

/data/vhosts/wiki-data/pages/nginx_强过防火墙.txt · 最后更改: 2014/06/05 22:57 由 harbinbeer