webserver-log-analyser/main.rb

142 lines
3.2 KiB
Ruby
Raw Normal View History

2024-05-16 09:24:41 +00:00
# frozen_string_literal: true
2024-05-16 10:35:08 +00:00
require 'date'
2024-05-17 11:08:56 +00:00
require_relative 'utils/http_codes'
2024-05-17 11:56:03 +00:00
require_relative 'utils/date_for_humans'
2024-05-16 10:35:08 +00:00
args = ARGV
if args[0] == '--parse' || args[0] == '-p'
puts("[!] Opening log file at #{args[1]}")
2024-05-19 16:48:02 +00:00
begin
logfile = File.open(args[1])
rescue Errno::ENOENT
warn "[x] The file #{args[1]} was not found."
exit 1
end
else
puts('[!] Log file path not provided, assuming default access.log')
2024-05-19 16:48:02 +00:00
begin
logfile = File.open('access.log')
rescue Errno::ENOENT
warn '[x] Default file access.log not found. Specify an alternative log file to parse with -p or --parse.'
exit 1
end
end
2024-05-16 09:24:41 +00:00
data = logfile.read
lines = data.split("\n")
2024-05-16 09:24:41 +00:00
# Gets the IP address from the string
2024-05-16 10:35:08 +00:00
# Note: sometimes the function can fetch the UA version as an IP address
2024-05-16 09:24:41 +00:00
def get_line_ip(line)
pattern = /\b(?:\d{1,3}\.){3}\d{1,3}\b/
line.match(pattern)[0] if line.match(pattern)
end
2024-05-19 23:52:47 +00:00
# TODO: Implement visits per month using this method
2024-05-16 10:35:08 +00:00
# Returns the date in the current line
def get_line_date(line)
pattern = /\[(.*?)\]/
match = line.match(pattern)[0] if line.match(pattern)
2024-05-17 11:56:03 +00:00
# [12/Apr/2023:13:56:41 +0100] -> 12/Apr/2023:13:56:41 +0100
2024-05-19 02:44:18 +00:00
Date.parse(match.gsub('[', '').gsub(']', ''))
2024-05-16 10:35:08 +00:00
end
2024-05-16 15:37:02 +00:00
# Gets the HTTP status code of the given log line
2024-05-16 10:35:08 +00:00
def get_line_code(line)
pattern = /\s(\d{3})\s/
line.match(pattern)[1] if line.match(pattern)
end
2024-05-16 15:37:02 +00:00
# Gets the user agent of the given log line
def get_line_browser(line)
2024-05-16 10:35:08 +00:00
pattern = /"([^"]*)"$/
user_agent = line.match(pattern)[0] if line.match(pattern)
user_agent.gsub('"', '')
end
2024-05-16 09:24:41 +00:00
# Gets the number of times an IP contacted the site
def times_appeared_single(ips, ip_to_check)
counter = 0
ips.each do |ip|
counter += 1 if ip == ip_to_check
2024-05-16 09:24:41 +00:00
end
2024-05-16 11:47:23 +00:00
counter
2024-05-16 09:24:41 +00:00
end
2024-05-16 15:37:02 +00:00
# Returns all unique IPs in a given list of IP addresses with duplicates
2024-05-16 11:47:23 +00:00
def sort_unique_ip(ips)
seen_ips = {}
unique_ips = []
ips.each do |ip|
unless seen_ips[ip]
unique_ips << ip
seen_ips[ip] = true
end
end
unique_ips
end
2024-05-16 10:35:08 +00:00
2024-05-16 11:47:23 +00:00
visit_counter = {}
2024-05-16 15:37:02 +00:00
client_errors = {}
browsers = {}
2024-05-16 11:47:23 +00:00
all_ips = []
2024-05-16 15:37:02 +00:00
lines.each do |line|
2024-05-16 11:47:23 +00:00
all_ips << get_line_ip(line)
2024-05-16 09:24:41 +00:00
end
2024-05-16 11:47:23 +00:00
unique_ips = sort_unique_ip(all_ips)
2024-05-17 11:08:56 +00:00
puts("There were a total of #{unique_ips.length} unique IPs who connected to our site.")
2024-05-16 11:47:23 +00:00
unique_ips.each do |ip|
visit_counter[ip] = times_appeared_single(all_ips, ip)
2024-05-17 11:08:56 +00:00
puts("IP #{ip} contacted our site #{times_appeared_single(all_ips, ip)} times.")
2024-05-16 11:47:23 +00:00
end
2024-05-16 15:37:02 +00:00
lines.each do |line|
2024-05-17 11:08:56 +00:00
ip = get_line_ip(line).to_s
code = get_line_code(line)
browser = get_line_browser(line)
# refactored from "ua" to "browser"
2024-05-17 11:08:56 +00:00
if is_client_err?(code.to_i)
if client_errors[ip]
client_errors[ip] += 1
else
client_errors[ip] = 1
end
end
# refactored from: "user_agents[ua]" to "browsers[browser]"
if browsers[browser]
browsers[browser] += 1
2024-05-17 11:08:56 +00:00
else
browsers[browser] = 1
2024-05-17 11:08:56 +00:00
end
2024-05-16 15:37:02 +00:00
end
2024-05-17 11:08:56 +00:00
top_browsers = browsers.sort_by { |_ua, count| -count }.first(5)
puts 'Top 5 browsers contacting the site:'
top_browsers.each do |ua, count|
2024-05-17 11:08:56 +00:00
puts "#{ua}: #{count} visits"
end
top_client_errors = client_errors.sort_by { |_ip, count| -count }.first(5)
2024-05-19 02:44:18 +00:00
puts 'Top 5 IPs with most client errors (400-499):'
2024-05-17 11:08:56 +00:00
top_client_errors.each do |ip, count|
puts "#{ip}: #{count} errors"
end