Master Apache & Nginx Log Analysis with Powerful Bash Commands
This guide compiles essential Bash one‑liners for Apache and Nginx log files, showing how to list top IPs, count unique visitors, tally page hits, filter bots, and generate time‑based request statistics for effective web‑service monitoring.
Apache Log Statistics
# List the IPs with the most accesses today
cut -d- -f 1 access_log | uniq -c | sort -rn | head -20
# Count how many unique IPs accessed today
awk '{print $1}' access_log | sort | uniq | wc -l
# Count total accesses of a specific page (e.g., index.php)
cat access_log | grep "index.php" | wc -l
# Show how many pages each IP accessed
awk '{++S[$1]} END {for (a in S) print a, S[a]}' access_log
# Sort IPs by the number of pages they accessed (ascending)
awk '{++S[$1]} END {for (a in S) print S[a], a}' access_log | sort -n
# List pages visited by a particular IP (example 192.168.1.2)
grep "^192.168.1.2" access_log | awk '{print $1,$7}'
# Exclude search‑engine bots from daily page count
awk '{print $12,$1}' access_log | grep ^"Mozilla" | awk '{print $2}' | sort | uniq | wc -l
# Count IPs that accessed within a specific hour (example 21/Nov/2019:03:40:26)
awk '{print $4,$1}' access_log | grep "21/Nov/2019:03:40:26" | awk '{print $2}' | sort | uniq | wc -lNginx Log Statistics
# List all unique IPs
awk '{print $1}' access_log | sort -n | uniq
# Show the top 100 most frequent IPs
awk '{print $1}' access_log | sort -n | uniq -c | sort -rn | head -100
# Show IPs with more than 100 accesses
awk '{print $1}' access_log | sort -n | uniq -c | awk '{if ($1 > 100) print $0}' | sort -rn
# Detailed access list for a specific IP, sorted by frequency (example 192.168.1.2)
grep '192.168.1.2' access_log | awk '{print $7}' | sort | uniq -c | sort -rn | head -100
# Top 100 most requested pages
awk '{print $7}' access_log | sort | uniq -c | sort -rn | head -100
# Top 100 pages excluding php and py files
grep -E -v ".php|.py" access_log | awk '{print $7}' | sort | uniq -c | sort -rn | head -100
# Pages with more than 100 hits
cat access_log | cut -d ' ' -f 7 | sort | uniq -c | awk '{if ($1 > 100) print $0}'
# Most requested pages in the last 1000 lines
tail -1000 access_log | awk '{print $7}' | sort | uniq -c | sort -nr
# Top 100 timestamps by requests per second
awk '{print $4}' access_log | cut -c14-21 | sort | uniq -c | sort -rn | head -100
# Top 100 timestamps by requests per minute
awk '{print $4}' access_log | cut -c14-18 | sort | uniq -c | sort -rn | head -100
# Top 100 timestamps by requests per hour
awk '{print $4}' access_log | cut -c14-15 | sort | uniq -c | sort -rn | head -100Web Service Status Statistics
# List crawlers (Googlebot, Baiduspider)
grep -E 'Googlebot|Baiduspider' access_log | awk '{print $1}' | sort | uniq
# Count browser types (excluding common browsers)
cat access_log | grep -v -E 'MSIE|Firefox|Chrome|Opera|Safari|Gecko|Maxthon' | sort | uniq -c | sort -rn | head -100
# Show IP subnet distribution (first three octets)
cat access_log | awk '{print $1}' | awk -F'.' '{print $1"."$2"."$3".0"}' | sort | uniq -c | sort -rn | head -200
# List referring domains
cat access_log | awk '{print $2}' | sort | uniq -c | sort -rn
# Count HTTP status codes
cat access_log | awk '{print $9}' | sort | uniq -c | sort -rn
# Count URL access frequencies
cat access_log | awk '{print $7}' | sort | uniq -c | sort -rn
# Count URL traffic (including query strings)
cat access_log | awk '{print $7}' | egrep '\?|&' | sort | uniq -c | sort -rn
# File traffic statistics (sum of bytes per URL, filter 200 responses)
cat access_log | awk '{sum[$7]+=$10} END {for (i in sum) print sum[i], i}' | sort -rn | grep ' 200 'Combined Statistics
# (Commands duplicated from previous sections for convenience)
cut -d- -f 1 access_log | uniq -c | sort -rn | head -20
awk '{print $1}' access_log | sort | uniq | wc -l
cat access_log | grep "index.php" | wc -l
awk '{++S[$1]} END {for (a in S) print a,S[a]}' access_log
awk '{++S[$1]} END {for (a in S) print S[a],a}' access_log | sort -n
grep "^192.168.1.2" access_log | awk '{print $1,$7}'
awk '{print $12,$1}' access_log | grep ^"Mozilla" | awk '{print $2}' | sort | uniq | wc -l
awk '{print $4,$1}' access_log | grep "21/Nov/2019:03:40:26" | awk '{print $2}' | sort | uniq | wc -l
# (Nginx commands repeated as needed)
awk '{print $1}' access_log | sort -n | uniq
awk '{print $1}' access_log | sort -n | uniq -c | sort -rn | head -100
awk '{print $1}' access_log | sort -n | uniq -c | awk '{if ($1 > 100) print $0}' | sort -rn
grep '192.168.1.2' access_log | awk '{print $7}' | sort | uniq -c | sort -rn | head -100
awk '{print $7}' access_log | sort | uniq -c | sort -rn | head -100
grep -E -v ".php|.py" access_log | awk '{print $7}' | sort | uniq -c | sort -rn | head -100
cat access_log | cut -d ' ' -f 7 | sort | uniq -c | awk '{if ($1 > 100) print $0}'
tail -1000 access_log | awk '{print $7}' | sort | uniq -c | sort -nr
awk '{print $4}' access_log | cut -c14-21 | sort | uniq -c | sort -rn | head -100
awk '{print $4}' access_log | cut -c14-18 | sort | uniq -c | sort -rn | head -100
awk '{print $4}' access_log | cut -c14-15 | sort | uniq -c | sort -rn | head -100Count Statistics
# Count accesses of a specific page
grep "/index.php" log_file | wc -l
# Pages per IP
awk '{++S[$1]} END {for (a in S) print a,S[a]}' log_file
# Sort IPs by page count
awk '{++S[$1]} END {for (a in S) print S[a],a}' log_file | sort -n
# Pages visited by a specific IP (example 111.111.111.111)
grep ^111.111.111.111 log_file | awk '{print $1,$7}'
# Exclude search engine bots from daily page count
awk '{print $12,$1}' log_file | grep ^"Mozilla" | awk '{print $2}' | sort | uniq | wc -l
# Count IPs in a specific hour (example 21/Jun/2018:14)
awk '{print $4,$1}' log_file | grep 21/Jun/2018:14 | awk '{print $2}' | sort | uniq | wc -l
# List crawlers
grep -E 'Googlebot|Baiduspider' /www/logs/access.2019-02-23.log | awk '{print $1}' | sort | uniq
# Count browsers (excluding common browsers)
cat /www/logs/access.2019-02-23.log | grep -v -E 'MSIE|Firefox|Chrome|Opera|Safari|Gecko|Maxthon' | sort | uniq -c | sort -r -n | head -100
# IP statistics (top 10 IPs)
grep '23/May/2019' /www/logs/access.2019-02-23.log | awk '{print $1}' | awk -F'.' '{print $1"."$2"."$3"."$4}' | sort | uniq -c | sort -r -n | head -10
# Subnet statistics (top 200 subnets)
cat /www/logs/access.2019-02-23.log | awk '{print $1}' | awk -F'.' '{print $1"."$2"."$3".0"}' | sort | uniq -c | sort -r -n | head -200
# Domain statistics
cat /www/logs/access.2019-02-23.log | awk '{print $2}' | sort | uniq -c | sort -rn
# HTTP status codes
cat /www/logs/access.2019-02-23.log | awk '{print $9}' | sort | uniq -c | sort -rn
# URL access count
cat /www/logs/access.2019-02-23.log | awk '{print $7}' | sort | uniq -c | sort -rn
# URL traffic (including query strings)
cat /www/logs/access.2019-02-23.log | awk '{print $7}' | egrep '\?|&' | sort | uniq -c | sort -rn
# File traffic (bytes per URL, filter 200 responses)
cat /www/logs/access.2019-02-23.log | awk '{sum[$7]+=$10} END {for (i in sum) print sum[i], i}' | sort -rn | grep ' 200 'Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
MaGe Linux Operations
Founded in 2009, MaGe Education is a top Chinese high‑end IT training brand. Its graduates earn 12K+ RMB salaries, and the school has trained tens of thousands of students. It offers high‑pay courses in Linux cloud operations, Python full‑stack, automation, data analysis, AI, and Go high‑concurrency architecture. Thanks to quality courses and a solid reputation, it has talent partnerships with numerous internet firms.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
