BEN-600 - Log Processing Design ================================= Decided the easiest way to extract various values is probably going to be to import the logs into a MySQL database and go from there CREATE TABLE logentries ( id INT NOT NULL AUTO_INCREMENT, cache VARCHAR(20) NOT NULL, test VARCHAR(10) NOT NULL, dateepoch INT NOT NULL, reqstring VARCHAR(255) NOT NULL, reqextension VARCHAR(10) NOT NULL, bitrate INT NOT NULL, quality VARCHAR(5) NOT NULL, seglength INT NOT NULL, respstatus INT NOT NULL DEFAULT 0, respbytes INT NOT NULL DEFAULT 0, cachedisp VARCHAR(50) NOT NULL, respduration DECIMAL(16,8) NOT NULL, downstream VARCHAR(20), PRIMARY KEY(`id`), KEY(`test`,`cache`), KEY(`seglength`), KEY(`quality`), KEY(`bitrate`), KEY(`respstatus`), KEY(`cachedisp`) ); Want to add a couple of fields (cache/test) as well as convert the date to an epoch string, so need to be a script rather than a direct import really. select distinct respstatus from logentries; +------------+ | respstatus | +------------+ | 200 | | 206 | | 304 | | 404 | | 499 | | 502 | | 504 | +------------+ 7 rows in set (0.02 sec) mysql> select distinct bitrate, quality,seglength from logentries; +---------+---------+-----------+ | bitrate | quality | seglength | +---------+---------+-----------+ | 0 | 0 | 2 | | 1024 | 1080p | 2 | | 512 | 1080p | 2 | | 2048 | 1080p | 2 | | 0 | 0 | 10 | | 1024 | 1080p | 10 | | 2048 | 1080p | 10 | | 512 | 1080p | 10 | | 1024 | 480p_ | 10 | | 2048 | 480p_ | 10 | | 512 | 480p_ | 10 | +---------+---------+-----------+ 11 rows in set (0.02 sec) mysql> select distinct cachedisp from logentries; +-------------------+ | cachedisp | +-------------------+ | CACHE_- | | CACHE_EXPIRED | | CACHE_HIT | | CACHE_MISS | | CACHE_REVALIDATED | +-------------------+ Cool, happy with that, so will take a dump of the database as a backup and gzip the logs Actually, changed my mind and stripped "test" from the test numbers mysql> update logentries set test = REPLACE(test,'test',''); mysql> select test,cache, seglength, quality, MIN(respduration) as min, MAX(respduration) as max, avg(respduration) as mean from logentries where reqextension='ts' GROUP BY seglength,quality ORDER BY seglength,quality; +------+--------+-----------+---------+------------+-------------+----------------+ | test | cache | seglength | quality | min | max | mean | +------+--------+-----------+---------+------------+-------------+----------------+ | 10 | origin | 2 | 1080p | 0.00000000 | 18.85200000 | 0.879961965135 | | 17 | origin | 10 | 1080p | 0.00000000 | 49.89700000 | 4.508940745266 | | 27 | origin | 10 | 480p_ | 0.00000000 | 74.10600000 | 3.286449047619 | +------+--------+-----------+---------+------------+-------------+----------------+