# frozen_string_literal: true require 'date' require_relative 'utils/http_codes' require_relative 'utils/date_for_humans' args = ARGV if args[0] == '--parse' || args[0] == '-p' puts("[!] Opening log file at #{args[1]}") begin logfile = File.open(args[1]) rescue Errno::ENOENT warn "[x] The file #{args[1]} was not found." exit 1 end else puts('[!] Log file path not provided, assuming default access.log') begin logfile = File.open('access.log') rescue Errno::ENOENT warn '[x] Default file access.log not found. Specify an alternative log file to parse with -p or --parse.' exit 1 end end data = logfile.read lines = data.split("\n") # Gets the IP address from the string # Note: sometimes the function can fetch the UA version as an IP address def get_line_ip(line) pattern = /\b(?:\d{1,3}\.){3}\d{1,3}\b/ line.match(pattern)[0] if line.match(pattern) end # Returns the date in the current line def get_line_date(line) pattern = /\[(.*?)\]/ if line.match(pattern) match = line.match(pattern)[0] else raise StandardError("Invalid datetime data detected!") # Tests would help find the bug... end # [12/Apr/2023:13:56:41 +0100] -> 12/Apr/2023:13:56:41 +0100 -> Apr/2023 Date.parse(match.gsub('[', '').gsub(']', '')).strftime('%b/%Y') end # Gets the HTTP status code of the given log line def get_line_code(line) pattern = /\s(\d{3})\s/ line.match(pattern)[1] if line.match(pattern) end # Gets the user agent of the given log line def get_line_ua(line) pattern = /"([^"]*)"$/ user_agent = line.match(pattern)[0] if line.match(pattern) user_agent.gsub('"', '') end # Gets the number of times an IP contacted the site def times_appeared_single(ips, ip_to_check) counter = 0 ips.each do |ip| counter += 1 if ip == ip_to_check end counter end # Returns all unique IPs in a given list of IP addresses with duplicates def sort_unique_ip(ips) seen_ips = {} unique_ips = [] ips.each do |ip| unless seen_ips[ip] unique_ips << ip seen_ips[ip] = true end end unique_ips end visit_counter = {} monthly_visits = {} client_errors = {} user_agents = {} all_ips = [] lines.each do |line| all_ips << get_line_ip(line) end unique_ips = sort_unique_ip(all_ips) puts("There were a total of #{unique_ips.length} unique IPs who connected to our site.") unique_ips.each do |ip| visit_counter[ip] = times_appeared_single(all_ips, ip) puts("IP #{ip} contacted our site #{times_appeared_single(all_ips, ip)} times.") end lines.each do |line| ip = get_line_ip(line).to_s code = get_line_code(line) ua = get_line_ua(line) begin date = get_line_date(line) rescue StandardError date = 'N/A' end if is_client_err?(code.to_i) if client_errors[ip] client_errors[ip] += 1 else client_errors[ip] = 1 end end if user_agents[ua] user_agents[ua] += 1 else user_agents[ua] = 1 end if monthly_visits[ip] if monthly_visits[ip][date] monthly_visits[ip][date] += 1 else monthly_visits[ip][date] = 1 end else monthly_visits[ip] = { date => 1 } end end top_user_agents = user_agents.sort_by { |_ua, count| -count }.first(5) puts 'Top 5 User-Agents contacting the site:' top_user_agents.each do |ua, count| puts "#{ua}: #{count} visits" end top_client_errors = client_errors.sort_by { |_ip, count| -count }.first(5) puts 'Top 5 IPs with most client errors (400-499):' top_client_errors.each do |ip, count| puts "#{ip}: #{count} errors" end monthly_visits.each do |ip, visits| puts "------------ BY DATE ---------------" puts("IP #{ip} had the following visits:") visits.each do |date, count| puts "#{date}: #{count} visits" end end