Stuffing webserver access logs into json into mongodb

Andrew Lewman fb9e28908d rough script in julia to find GET request counts by hostname 1 month ago
README.md 80651085ec insert a sample JSON document as a reference 2 months ago
all_host_counts.jl fb9e28908d rough script in julia to find GET request counts by hostname 1 month ago
connect.rb 410831cf4c need to allow disk use for the big aggregate queries 2 months ago

README.md

Experiments on Ruby, MongoDB, JSON, and Caddy

For fun, use caddy webserver native JSON format logs, loaded into MongoDB, queried with ruby. Because...why not?

JSON Document Structure

Here's one request:

{
        "_id" : ObjectId("6015e0b9c20ce7e9eefcb4d2"),
        "level" : "info",
        "ts" : 1612046520.928371,
        "logger" : "http.log.access.log1",
        "msg" : "handled request",
        "request" : {
                "remote_addr" : "[2a01:4f8:a0:24dd::2]:48464",
                "proto" : "HTTP/1.1",
                "method" : "GET",
                "host" : "code.lewman.com",
                "uri" : "/andrew/ipvtech-crawler/src/37d0280daf8eb9ef25ed82b0c08f39e0ca14a962?lang=pt-BR",
                "headers" : {
                        "Accept" : [
                                "text/html,text/plain,text/xml,text/*,application/xml,application/xhtml+xml,application/rss+xml,application/atom+xml,application/rdf+xml,application/php,application/x-php,application/x-httpd-php"
                        ],
                        "User-Agent" : [
                                "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)"
                        ],
                        "Accept-Encoding" : [
                                "br,gzip"
                        ],
                        "Accept-Language" : [
                                "en"
                        ]
                },
                "tls" : {
                        "resumed" : false,
                        "version" : 771,
                        "cipher_suite" : 49196,
                        "proto" : "",
                        "proto_mutual" : true,
                        "server_name" : "code.lewman.com"
                }       
        },              
        "common_log" : "2a01:4f8:a0:24dd::2 - - [30/Jan/2021:22:42:00 +0000] \"GET /andrew/ipvtech-crawler/src/37d0280daf8eb9ef25ed82b0c08f39e0ca14a962?lang=pt-BR HTTP/1.1\" 302 91",
        "duration" : 0.000721969,
        "size" : 91,    
        "status" : 302,         
        "resp_headers" : {
                "Content-Length" : [
                        "91"
                ],      
                "X-Frame-Options" : [
                        "DENY"
                ],      
                "Strict-Transport-Security" : [
                        "max-age=31536000;"
                ],
                "X-Content-Type-Options" : [
                        "nosniff"
                ],
                "Content-Type" : [
                        "text/html; charset=utf-8"
                ],
                "Set-Cookie" : [
                        "lang=pt-BR; Path=/; Max-Age=2147483647"
                ],
                "Date" : [
                        "Sat, 30 Jan 2021 22:42:00 GMT"
                ],
                "Server" : [
                        "Caddy"
                ],
                "X-Xss-Protection" : [
                        "1; mode=block"
                ],
                "Cache-Control" : [
                        "max-age=31536000"
                ],
                "Location" : [
                        "/andrew/ipvtech-crawler/src/37d0280daf8eb9ef25ed82b0c08f39e0ca14a962"
                ]
        }
}