2024-05-16 09:24:41 +00:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2024-05-16 10:35:08 +00:00
|
|
|
require 'date'
|
2024-05-17 11:08:56 +00:00
|
|
|
require_relative 'utils/http_codes'
|
2024-05-17 11:56:03 +00:00
|
|
|
require_relative 'utils/date_for_humans'
|
2024-05-16 10:35:08 +00:00
|
|
|
|
2024-05-19 16:25:39 +00:00
|
|
|
args = ARGV
|
|
|
|
|
|
|
|
if args[0] == '--parse' || args[0] == '-p'
|
|
|
|
puts("[!] Opening log file at #{args[1]}")
|
2024-05-19 16:48:02 +00:00
|
|
|
|
|
|
|
begin
|
|
|
|
logfile = File.open(args[1])
|
|
|
|
rescue Errno::ENOENT
|
|
|
|
warn "[x] The file #{args[1]} was not found."
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2024-05-19 16:25:39 +00:00
|
|
|
else
|
|
|
|
puts('[!] Log file path not provided, assuming default access.log')
|
2024-05-19 16:48:02 +00:00
|
|
|
|
|
|
|
begin
|
|
|
|
logfile = File.open('access.log')
|
|
|
|
rescue Errno::ENOENT
|
|
|
|
warn '[x] Default file access.log not found. Specify an alternative log file to parse with -p or --parse.'
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2024-05-19 16:25:39 +00:00
|
|
|
end
|
|
|
|
|
2024-05-16 09:24:41 +00:00
|
|
|
data = logfile.read
|
|
|
|
lines = data.split("\n")
|
|
|
|
|
2024-05-19 16:25:39 +00:00
|
|
|
|
2024-05-16 09:24:41 +00:00
|
|
|
# Gets the IP address from the string
|
2024-05-16 10:35:08 +00:00
|
|
|
# Note: sometimes the function can fetch the UA version as an IP address
|
2024-05-16 09:24:41 +00:00
|
|
|
def get_line_ip(line)
|
|
|
|
pattern = /\b(?:\d{1,3}\.){3}\d{1,3}\b/
|
|
|
|
line.match(pattern)[0] if line.match(pattern)
|
|
|
|
end
|
|
|
|
|
2024-05-16 10:35:08 +00:00
|
|
|
# Returns the date in the current line
|
|
|
|
def get_line_date(line)
|
|
|
|
pattern = /\[(.*?)\]/
|
2024-05-20 09:08:24 +00:00
|
|
|
if line.match(pattern)
|
|
|
|
match = line.match(pattern)[0]
|
|
|
|
else
|
|
|
|
raise StandardError("Invalid datetime data detected!")
|
2024-05-20 09:15:36 +00:00
|
|
|
# Tests would help find the bug...
|
2024-05-20 09:08:24 +00:00
|
|
|
end
|
2024-05-19 21:09:41 +00:00
|
|
|
# [12/Apr/2023:13:56:41 +0100] -> 12/Apr/2023:13:56:41 +0100 -> Apr/2023
|
|
|
|
Date.parse(match.gsub('[', '').gsub(']', '')).strftime('%b/%Y')
|
2024-05-16 10:35:08 +00:00
|
|
|
end
|
|
|
|
|
2024-05-16 15:37:02 +00:00
|
|
|
# Gets the HTTP status code of the given log line
|
2024-05-16 10:35:08 +00:00
|
|
|
def get_line_code(line)
|
|
|
|
pattern = /\s(\d{3})\s/
|
|
|
|
line.match(pattern)[1] if line.match(pattern)
|
|
|
|
end
|
|
|
|
|
2024-05-16 15:37:02 +00:00
|
|
|
# Gets the user agent of the given log line
|
2024-05-16 10:35:08 +00:00
|
|
|
def get_line_ua(line)
|
|
|
|
pattern = /"([^"]*)"$/
|
|
|
|
user_agent = line.match(pattern)[0] if line.match(pattern)
|
|
|
|
|
|
|
|
user_agent.gsub('"', '')
|
|
|
|
end
|
|
|
|
|
2024-05-16 09:24:41 +00:00
|
|
|
# Gets the number of times an IP contacted the site
|
|
|
|
def times_appeared_single(ips, ip_to_check)
|
|
|
|
counter = 0
|
|
|
|
ips.each do |ip|
|
2024-05-19 16:06:53 +00:00
|
|
|
counter += 1 if ip == ip_to_check
|
2024-05-16 09:24:41 +00:00
|
|
|
end
|
2024-05-16 11:47:23 +00:00
|
|
|
counter
|
2024-05-16 09:24:41 +00:00
|
|
|
end
|
|
|
|
|
2024-05-16 15:37:02 +00:00
|
|
|
# Returns all unique IPs in a given list of IP addresses with duplicates
|
2024-05-16 11:47:23 +00:00
|
|
|
def sort_unique_ip(ips)
|
|
|
|
|
|
|
|
seen_ips = {}
|
|
|
|
unique_ips = []
|
|
|
|
|
|
|
|
ips.each do |ip|
|
|
|
|
unless seen_ips[ip]
|
|
|
|
unique_ips << ip
|
|
|
|
seen_ips[ip] = true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
unique_ips
|
|
|
|
end
|
2024-05-16 10:35:08 +00:00
|
|
|
|
2024-05-16 11:47:23 +00:00
|
|
|
visit_counter = {}
|
2024-05-19 21:09:41 +00:00
|
|
|
monthly_visits = {}
|
|
|
|
|
2024-05-16 15:37:02 +00:00
|
|
|
client_errors = {}
|
2024-05-17 11:08:56 +00:00
|
|
|
user_agents = {}
|
2024-05-16 11:47:23 +00:00
|
|
|
all_ips = []
|
2024-05-16 15:37:02 +00:00
|
|
|
|
2024-05-19 16:06:53 +00:00
|
|
|
lines.each do |line|
|
2024-05-16 11:47:23 +00:00
|
|
|
all_ips << get_line_ip(line)
|
2024-05-16 09:24:41 +00:00
|
|
|
end
|
|
|
|
|
2024-05-16 11:47:23 +00:00
|
|
|
unique_ips = sort_unique_ip(all_ips)
|
|
|
|
|
2024-05-17 11:08:56 +00:00
|
|
|
puts("There were a total of #{unique_ips.length} unique IPs who connected to our site.")
|
2024-05-16 11:47:23 +00:00
|
|
|
|
|
|
|
unique_ips.each do |ip|
|
|
|
|
visit_counter[ip] = times_appeared_single(all_ips, ip)
|
2024-05-17 11:08:56 +00:00
|
|
|
puts("IP #{ip} contacted our site #{times_appeared_single(all_ips, ip)} times.")
|
2024-05-16 11:47:23 +00:00
|
|
|
end
|
2024-05-16 15:37:02 +00:00
|
|
|
|
|
|
|
lines.each do |line|
|
2024-05-17 11:08:56 +00:00
|
|
|
ip = get_line_ip(line).to_s
|
|
|
|
code = get_line_code(line)
|
|
|
|
ua = get_line_ua(line)
|
2024-05-20 09:08:24 +00:00
|
|
|
|
|
|
|
begin
|
|
|
|
date = get_line_date(line)
|
|
|
|
rescue StandardError
|
|
|
|
date = 'N/A'
|
|
|
|
end
|
2024-05-17 11:08:56 +00:00
|
|
|
|
|
|
|
if is_client_err?(code.to_i)
|
|
|
|
if client_errors[ip]
|
|
|
|
client_errors[ip] += 1
|
|
|
|
else
|
|
|
|
client_errors[ip] = 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if user_agents[ua]
|
|
|
|
user_agents[ua] += 1
|
|
|
|
else
|
|
|
|
user_agents[ua] = 1
|
|
|
|
end
|
2024-05-16 15:37:02 +00:00
|
|
|
|
2024-05-19 21:09:41 +00:00
|
|
|
if monthly_visits[ip]
|
|
|
|
if monthly_visits[ip][date]
|
|
|
|
monthly_visits[ip][date] += 1
|
|
|
|
else
|
|
|
|
monthly_visits[ip][date] = 1
|
|
|
|
end
|
|
|
|
else
|
|
|
|
monthly_visits[ip] = { date => 1 }
|
|
|
|
end
|
|
|
|
|
2024-05-16 15:37:02 +00:00
|
|
|
end
|
2024-05-17 11:08:56 +00:00
|
|
|
|
2024-05-19 16:06:53 +00:00
|
|
|
top_user_agents = user_agents.sort_by { |_ua, count| -count }.first(5)
|
2024-05-19 02:44:18 +00:00
|
|
|
puts 'Top 5 User-Agents contacting the site:'
|
2024-05-17 11:08:56 +00:00
|
|
|
top_user_agents.each do |ua, count|
|
|
|
|
puts "#{ua}: #{count} visits"
|
|
|
|
end
|
|
|
|
|
2024-05-19 16:06:53 +00:00
|
|
|
top_client_errors = client_errors.sort_by { |_ip, count| -count }.first(5)
|
2024-05-19 02:44:18 +00:00
|
|
|
puts 'Top 5 IPs with most client errors (400-499):'
|
2024-05-17 11:08:56 +00:00
|
|
|
top_client_errors.each do |ip, count|
|
|
|
|
puts "#{ip}: #{count} errors"
|
2024-05-19 16:06:53 +00:00
|
|
|
end
|
2024-05-19 21:09:41 +00:00
|
|
|
|
|
|
|
monthly_visits.each do |ip, visits|
|
2024-05-20 09:15:36 +00:00
|
|
|
puts "------------ BY DATE ---------------"
|
|
|
|
puts("IP #{ip} had the following visits:")
|
2024-05-19 21:09:41 +00:00
|
|
|
visits.each do |date, count|
|
2024-05-20 09:08:24 +00:00
|
|
|
puts "#{date}: #{count} visits"
|
2024-05-19 21:09:41 +00:00
|
|
|
end
|
|
|
|
end
|
2024-05-20 08:52:17 +00:00
|
|
|
|