ルーターの状態を監視し異常を通知する（FWX120を利用）

ルーターの状態を監視し異常を通知する

定期的にルーターの状態を監視し、異常を検出した場合にメールで通知するLuaスクリプトです。
以下の状態毎に閾値を指定し、閾値を超えた場合に異常が発生したと見なしてメールを送ります。

CPU使用率
メモリ使用率
筐体内温度(※)
PPインタフェース回線使用率（送信負荷、受信負荷）
IPマスカレードで使用中のポート数

(※) 筐体内温度の計測に対応した機種を使用する場合だけ監視を行います。

設定例はこちら

Luaスクリプト例はこちら

FWX120の設定例

下記の設定（Config）を取り出すことができます。

LANのインタフェースの設定（LAN1ポートを使用）	ip lan1 address 192.168.0.1/24
WANのインタフェースの設定（LAN2ポートを使用）	pp select 1 pp always-on on pppoe use lan2 pp auth accept pap chap pp auth myname (ISPに接続するID) (ISPに接続するパスワード) ppp lcp mru on 1454 ppp ipcp ipaddress on ppp ipcp msext on ip pp mtu 1454 ip pp nat descriptor 1 pp enable 1 ip route default gateway pp 1
NATの設定	nat descriptor type 1 masquerade
DHCPの設定	dhcp service server dhcp scope 1 192.168.0.2-192.168.0.100/24
DNSの設定	dns server (ISPより指定されたDNSサーバーのIPアドレス) dns private address spoof on
フィルタの設定	ip inbound filter 1001 reject-nolog * * tcp,udp * 135 ip inbound filter 1002 reject-nolog * * tcp,udp 135 * ip inbound filter 1003 reject-nolog * * tcp,udp * netbios_ns-netbios_ssn ip inbound filter 1004 reject-nolog * * tcp,udp netbios_ns-netbios_ssn * ip inbound filter 1005 reject-nolog * * tcp,udp * 445 ip inbound filter 1006 reject-nolog * * tcp,udp 445 * ip inbound filter 1007 reject-nolog 192.168.0.0/24 * * * * ip inbound filter 1099 pass-nolog * * * * * ip policy interface group 101 name=Private local lan1 ip policy address group 101 name=Private 192.168.0.0/24 ip policy address group 102 name=Any * ip policy service group 101 name="Open Services" ip policy service group 102 name=General dns ip policy service group 103 name=Mail pop3 smtp ip policy filter 1100 reject-nolog lan1 * * * * ip policy filter 1110 pass-nolog * * * * 102 ip policy filter 1122 static-pass-nolog * lan1 * * * ip policy filter 1123 static-pass-nolog * local * * * ip policy filter 1124 static-pass-log * * 192.168.0.0/24 * http ip policy filter 1150 pass-nolog * pp1 * * * ip policy filter 1500 reject-nolog pp* * * * * ip policy filter 1520 pass-log * lan1 * * 101 ip policy filter 1700 pass-nolog local * * * * ip policy filter 1710 static-pass-nolog * lan1 * * * ip policy filter 3000 reject-nolog * * * * * ip policy filter set 101 name="Internet Access" 1100 [1110 1123 [1124] 1122 1150] 1500 [1520] 1700 [1710] 3000 ip policy filter set enable 101 pp select 1 ip pp inbound filter list 1001 1002 1003 1004 1005 1006 1007 1099
Luaスクリプトのスケジュール設定	schedule at 1 startup * lua (Luaスクリプトファイル名)

Luaスクリプト例

下記のLuaスクリプトを取り出すことができます。

設定値	-- 監視間隔 (1-864000 秒) idle_time = (監視間隔) -- CPU使用率を監視する単位時間（"5sec", "1min" または "5min"） cpu_time = "(単位時間)" -- 監視する相手先情報番号（1 - 30） peer_num = (相手先情報番号) -- 使用状況を監視するIPマスカレードのNATディスクリプタ番号（1 - 2147483647） nat_descriptor = (NATディスクリプタ番号) -- 各状態の閾値 th_tbl = { 　cpu = (CPU使用率（%）), 　mem = (メモリ使用率（%）), 　tmp = (筐体内温度（℃）),　-- RTX1200のみ　snd = (PPインタフェース送信負荷率（%）), 　rcv = (PPインタフェース受信負荷率（%）), 　nat = (IPマスカレード使用ポート数（個）) } -- 連続で閾値を超えたら異常と判断する回数、または正常な状態に復帰したと判断する回数（1, 2 ..） count = (回数) -- 正常な状態に復帰した場合にもメールを送るか否か（送る:true / 送らない:false） down_mail = (true / false) -- メールの設定 mail_tbl = { 　smtp_address = "(SMTPサーバーのアドレス)", 　from = "(送信元メールアドレス)", 　to = "(宛先メールアドレス)" } -- メールの送信に失敗した時に出力する SYSLOG のレベル（info, debug, notice） log_level = "(SYSLOGレベル)"
指定した単位時間のCPU使用率を取得するための検索文字列を設定する関数	function set_cpu_ptn(key) 　local ptn 　if (key == "5sec") or (key == "1min") or (key == "5min") then 　　ptn = "(%d+)%%%(" .. key .. "%)" 　　return ptn 　end 　return nil end
ルーターのハードウェアリソースの使用状況を取得する関数	function rt_res_status(t) 　local rtn, str 　local cmd = "show environment" 　　rtn, str = rt.command(cmd) 　if (rtn) and (str) then 　　for k, v in pairs(t) do 　　　v.val = str:match(v.ptn) 　　　if (v.val) then 　　　　v.val = tonumber(v.val) 　　　end 　　end 　else 　　str = cmd .. "コマンド実行失敗\r\n\r\n" 　end 　return rtn, str end
PPインタフェースの回線負荷率を求める関数	function pp_load_info(num) 　local rtn, snd, rcv, str, n 　local t = {} 　local cmd = "show status pp " .. tostring(num) 　local ptn = "負荷%:%s+(%d+)%.%d+%%" 　rtn, str = rt.command(cmd) 　if (rtn) and (str) then 　　n = 1 　　for w in string.gmatch(str, ptn) do 　　　t[n] = w 　　　n = n + 1 　　end 　　if (t[1]) then 　　　rcv = tonumber(t[1]) 　　end 　　if (t[2]) then 　　　snd = tonumber(t[2]) 　　end 　end 　return rtn, rcv, snd, str end
IPマスカレード使用ポート数を返す関数	function natmsq_use_status(id) 　local rtn, str, num 　local cmd = "show nat descriptor address " .. tostring(id) 　local ptn = "(%d+)個使用中" 　　rtn, str = rt.command(cmd) 　if (rtn) and (str) then 　　num = str:match(ptn) 　　if (num) then 　　　num = tonumber(num) 　　end 　else 　　str = cmd .. "コマンド実行失敗\r\n" 　end 　return rtn, num, str end
各状態の数値が閾値を超えた時、または正常に復帰した時にメッセージを返す関数	function make_msg(t, val, th, down) 　local rtn 　local str = "" 　if (val) then 　　rtn = count_proc(t, val, th) 　　if (rtn < 0) then 　　　if (down) then 　　　　str = t.title .. "が閾値以下の値に下がりました。" 　　　end 　　elseif (rtn > 0) then 　　　str = t.title .. "が閾値を超えました。\r\n" 　　　str = str .. string.format(" %s: %d%s\r\n 閾値: %d%s\r\n\r\n", 　　　　　t.title, val, t.unit, th, t.unit) 　　end 　end 　return str end
閾値を超えた（または下回った）連続回数をカウントする関数	function count_proc(t, val, th) 　local rtn = 0 　if (val > th) then 　　if (not t.flag) then 　　　t.over = t.over + 1 　　　if (t.over == count) then 　　　　rtn = 1 　　　　t.flag = true 　　　end 　　else 　　　if (t.down > 0) then 　　　　t.down = 0 　　　end 　　end 　else 　　if (t.flag) then 　　　t.down = t.down + 1 　　　if (t.down == count) then 　　　　rtn = -1 　　　　t.flag = false 　　　　t.over = 0 　　　　t.down = 0 　　　end 　　else 　　　if (t.over > 0) then 　　　　t.over = 0 　　　end 　　end 　end 　return rtn end
現在の日時を取得する関数	function time_stamp() 　local t 　t = os.date("*t") 　return string.format("%d/%02d/%02d %02d:%02d:%02d", 　　t.year, t.month, t.day, t.hour, t.min, t.sec) end
メインルーチン	-- ハードウェアリソース情報テーブル local res_tbl = { 　cpu = {ptn = "", val = 0, over = 0, down = 0, flag = false, title = "CPU負荷率(" .. cpu_time .. ")", unit = "%"}, 　mem = {ptn = "(%d+)%% used", val = 0, over = 0, down = 0, flag = false, title = "メモリ使用率", unit = "%%"}, 　tmp = {ptn = "筐体内温度%(℃%): (%d+)", val = 0, over = 0, down = 0, flag = false, title = "筐体内温度", unit = "℃"} } local pp_tbl = { 　rcv = {over = 0, down = 0, flag = false, title = "PP 受信負荷率", unit = "%"}, 　snd = {over = 0, down = 0, flag = false, title = "PP 送信負荷率", unit = "%"} } local nat_tbl = {over = 0, down = 0, flag = false, title = "NAT マスカレードテーブル使用ポート数", unit = "個"} local rtn, str, nat_use local rt_name = string.match(_RT_FIRM_REVISION, "(%w+)") res_tbl.cpu.ptn = set_cpu_ptn(cpu_time) assert(res_tbl.cpu.ptn) while (true) do 　mail_tbl.text = "" 　-- CPU使用率, メモリ使用率 (,筐体内温度 RTX1200のみ) 　rtn, str = rt_res_status(res_tbl) 　if (rtn) then 　　mail_tbl.text = mail_tbl.text .. make_msg(res_tbl.cpu, res_tbl.cpu.val, th_tbl.cpu, down_mail) 　　mail_tbl.text = mail_tbl.text .. make_msg(res_tbl.mem, res_tbl.mem.val, th_tbl.mem, down_mail) 　　if (rt_name == "RTX1200") then 　　　mail_tbl.text = mail_tbl.text .. make_msg(res_tbl.tmp, res_tbl.tmp.val, th_tbl.tmp, down_mail) 　　end 　end 　-- PPインタフェース回線負荷率　rtn, rcv, snd, str = pp_load_info(peer_num) 　if (rtn) then 　　mail_tbl.text = mail_tbl.text .. make_msg(pp_tbl.rcv, rcv, th_tbl.rcv, down_mail) 　　mail_tbl.text = mail_tbl.text .. make_msg(pp_tbl.snd, snd, th_tbl.snd, down_mail) 　else 　　mail_tbl.text = str 　end 　-- IPマスカレード使用ポート数　rtn, nat_use, str = natmsq_use_status(nat_descriptor) 　if (rtn) then 　　if (nat_use) then 　　　mail_tbl.text = mail_tbl.text .. make_msg(nat_tbl, nat_use, th_tbl.nat, down_mail) 　　end 　else 　　mail_tbl.text = str 　end 　if (mail_tbl.text:len() > 0) then 　　mail_tbl.subject = string.format("resource loadwatch (%s)", time_stamp()) 　　rtn = rt.mail(mail_tbl) 　　if (not rtn) then 　　　rt.syslog(log_level, "failed to send mail. (Luaスクリプトファイル名)") 　　end 　end 　rt.sleep(idle_time) end

ご相談・お問い合わせ