
(*
 * Logs Filter
 * 
 * by Gamall Wednesday Ida
 * 
 * gamall-ida.com
 * gamall.ida@gmail.com
 * 
 * Copyright 2007, 2008 Gamall Wednesday Ida
 * 
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 *)

(** 
  Generalist logs filter main module.
  
  @author Gamall Wednesday Ida
*)

open Toolkit;;

(** {6 Global parameters} *)

(** What kind of information goes through stderr ? *)
type opacity_t = 
  | OPAQUE       (** nothing is copied to stderr. All original input can be lost *)
  | RESIDUES     (** everything the filter rejected is copied to stderr *) 
  | TRANSPARENT  (** all input data is copied to stderr, regardless of what the filter did with it *)

(** opacity setting *)
let _opacity = ref OPAQUE

(** {6 Predicate and Transform mechanism}
  Using a hash table for internal storage
*)
 
(** Type for predicates *)
type predicate           = string -> bool

(** Type for transformations *)
type transform           = string -> string

(** Record of a registered function: function, short description, help page text *)
type 'funtype record     = 'funtype * string * string

(** Get the function back from a record *) 
let get_fun        = function a, b, c -> a

(** Get the short description from a record *)
let get_shortdesc  = function a, b, c -> b

(** Get the help page text from a record *)
let get_helpt      = function a, b, c -> c
  
(** The predicates hash table *)
let (hp : (string, predicate record) Hashtbl.t) = Hashtbl.create 10

(** The transforms hash table *)
let (ht : (string, transform record) Hashtbl.t) = Hashtbl.create 10

(** Register a function in its table *)
let register_fun htbl = Hashtbl.add htbl

(** Register a predicate *)
let regp p optn desc helpt = register_fun hp optn ((p, desc, helpt): predicate record)

(** Register a tranform *)
let regt t optn desc helpt = register_fun ht optn ((t, desc, helpt): transform record)

(** Get the whole record of a function from its option name 
  @raise Failure and abort if no record can be found *)
let get_record h optn = try Hashtbl.find h optn with Not_found -> 
  failwith (va "\nCould not find requested function '%s'.\nPlease see --help or --funs\n" optn)

(** Get a function from its option name *)
let get h optn = get_fun (get_record h optn)

(** Get predicate from option name *)
let getp = get hp 

(** Get transform from option name *)
let gett = get ht
  
(** {6 Predicates} 
  Predicates determine whether a given line must be kept or discared 
*)

(** {7 Definitions} *)
  
(** Keep all lines *)
let p_all s = true
  
(** Keep none *)
let p_none s = false
  
(** Jka logs public dialogue lines pattern *)  
let reg_jkdlg     = Str.regexp "say: \\(.*\\): \\(.*\\)"

(** Jka logs private dialogue lines pattern *)  
let reg_jkprivdlg = Str.regexp "tell: \\(.*\\) to \\(.*\\): \\(.*\\)"

(** Keep only public dialogues *)
let p_jk_dlgs s = 
  Str.string_match reg_jkdlg s 0

(** Keep all dialogues *)
let p_jk_adlgs s = 
  Str.string_match reg_jkdlg s 0 ||   Str.string_match reg_jkprivdlg s 0

(** Keep only private dialogues *)
let p_jk_pdlgs s = 
  Str.string_match reg_jkprivdlg s 0

(** JK logs events lines pattern *)
let reg_jkevent = Str.regexp "broadcast: print \"\\(.*\\)\\\\n\""

(** Keep JK logs events lines *)
let p_jk_event s = Str.string_match reg_jkevent s 0

(** JK server initialisation message pattern *)
let reg_jkservinit = Str.regexp "Server: \\(.*\\)"

(** Keep JK server initialisation message *)
let p_jk_servinit s = Str.string_match reg_jkservinit s 0
  
(** Q3 userinfo strings containing IPs pattern *)
let reg_q3uiip = Str.regexp ".*\\\\ip\\\\\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+:[0-9]+\\).*"

(** Keep Q3 userinfo strings containing IPs *)
let p_q3uiip s = Str.string_match reg_q3uiip s 0
  
(** {7 Registration} *)

(** Add all predicates to the record *)
let do_register_predicates () = begin
  (*****************************************************************************************)
  regp p_q3uiip           "q3_uiip"  "In Q3 logs, keep lines with userinfo string & ip" "
In games based on the Quake3 Engine, a string called the userinfo string stores
valuable information such as player name and IP address.
  
For instance, \"\\ip\\12.234.22.11:5656\\name\\Gamall\\etc...\" looks like a userinfo 
string.
  
This predicate keeps only the lines where a userinfo strings containing an IP
address appears.
  
Not that this predicate has an homonym transform.";

  (*****************************************************************************************)
  regp p_jk_servinit      "jk_sinit" "In Jedi Knight, keep server initialisation (maps)" "
In Jedi Knight servers, lines such as 
  
  ------ Server Initialization ------
  Server: mp/ffa1
  
indicate the map loaded on the server.
  
This predicate keeps only those lines, yielding the list of the maps loaded 
on the server.";
  
  (*****************************************************************************************)
  regp p_jk_event         "jk_event" "In Jedi Knight, keep localised game events" "
In Jedi Knight servers, some log lines read that way:
-> \"broadcast: print \"Gamall Wednesday Ida @@@PLRENAME Padawan\\n\"\"
The symbol prefixed by @@@ is replaced by localised strings in the game clients.
  
So these lines indicate lots of game events, connections, disconnections, 
kicks and bans, timelimits and much more.
  
This predicate only keeps lines of this form.
  
It has an homonym transform which removes the broadcast instructions from
the actual message.";
  
  (*****************************************************************************************)
  regp p_jk_pdlgs         "jk_pdlgs" "In Jedi Knight, keep private dialogues (tell)" "
In Jedi Knight servers, lines such as
  tell: from_name to to_name: private message
indicate private messages.
    
This predicate only keeps those lines.
    
Note that there are other predicates about dialogues:
See jk_adlgs and jk_dlgs.
  
There is an associated transform: jk_dlgs.";
  
  (*****************************************************************************************)
  regp p_jk_adlgs         "jk_adlgs" "In Jedi Knight, keep all dialogues (say and tell)" "
In Jedi Knight servers, lines such as
  tell: from_name to to_name: private message
indicate private messages, and lines such as
  say: name: message
indicate public messages.
    
This predicate only keeps those lines.
    
Note that there are other predicates about dialogues:
See jk_pdlgs and jk_dlgs.
  
There is an associated transform: jk_dlgs.";
  
  (*****************************************************************************************)
  regp p_jk_dlgs          "jk_dlgs"  "In Jedi Knight, keep only public dialogues (say)" "
In Jedi Knight servers, lines such as
  say: name: message
indicate public messages.
    
This predicate only keeps those lines.
    
Note that there are other predicates about dialogues:
See jk_adlgs and jk_pdlgs.
  
There is an associated transform: jk_dlgs.";
  
  (*****************************************************************************************)
  regp p_none             "none"     "Block all lines. Closed filter" "
This predicate always yields false, which means it keeps no line.
  
It is there only for the sake of completeness, but I doubt it has
any practical use besides debugging.";
  
  (*****************************************************************************************)
  regp p_all              "all"      "Accept all lines. Transparent filter (DEFAULT)" "
This predicate always yields true, which means it keeps all lines.

This is the default predicate, which means that it will be used if you don't 
select another. Then you can use a transform alone.";
end
  
(** {6 Transforms} 
  Transforms turn lines which are kept by the predicate into something more legible
*)

(** The identity transform: no alteration of the original. Default transform. *)
let t_id s = s
  
(** Debug mark *)
let t_debugmark s = "MARKED: " ^ s
 
(** Q3 colours escape sequence pattern *)
let reg_q3color = Str.regexp "\\^[0-9]"

(** Remove Q3 colour escape sequences *)
let remove_q3colors s = Str.global_replace reg_q3color "" s

(** JKA logs dialogue lines pretty-print, private and public *)   
let t_jk_dlgs s = 
  try if Str.string_match reg_jkdlg s 0 then
    let n, l = Str.matched_group 1 s, Str.matched_group 2 s in
    let name = remove_q3colors n and line = remove_q3colors l in
    va "%30s :: %s" name line
  else if Str.string_match reg_jkprivdlg s 0 then
    let n, nn, l = Str.matched_group 1 s, Str.matched_group 2 s, Str.matched_group 3 s in
    let name1 = remove_q3colors n and name2 = remove_q3colors nn and line  = remove_q3colors l in
    va "%45s ~: %s" (va "%s -> %s" name1 name2) line
  else "" with _ -> ""
    
(** JK log events pretty-print *)
let t_jk_event s = 
  try if Str.string_match reg_jkevent s 0 then
    Str.matched_group 1 s
  else "" with _ -> ""
    
(** Cut to 80 chars by the left *)
let t_cut80 s = try Str.first_chars s 80 with _ -> s

(** Cut to 80 chars by the right *)
let t_rcut80 s = try Str.last_chars s 80 with _ -> s
  
(** Separate IP from q3 userinfo strings *)
let t_q3uiip printorig s = 
  try if Str.string_match reg_q3uiip s 0 then
    let ipp = Str.matched_group 1 s in 
    if printorig then va "%22s  <>  %s" ipp s else ipp
  else "" with _ -> ""

(** {7 Registration} *)

(** Add all tranforms to the record *)
let do_register_transforms () = begin
  (*****************************************************************************************)
  regt (t_q3uiip false)   "q3_uioip" "Extracts IP from q3 userinfo string" "
This transform only retains the IP from lines with a 
userinfo string.
  
See q3_uiip if you want to retain a copy of the original line.";

  (*****************************************************************************************)
  regt (t_q3uiip true)    "q3_uiip"  "Separate IP from q3 userinfo string" "
This transform is meant to be used in conjunction with its homonym predicate.
It isolates the IP from a userinfo string, and displays it clearly.
  
The original string is also entirely displayed after the IP.
  
There is another transform which prints the IP alone: q3_uioip";
  
  (*****************************************************************************************)
  regt t_rcut80           "rcut80"   "Truncate to the last  80 characters" "
This transform only keeps the LAST 80 characters of each line, which allows
the resulting text to be printed on any terminal without any line break.
  
Information can be lost, though.
  
This transform has a sister: see cut80";

  (*****************************************************************************************)
  regt t_cut80            "cut80"    "Truncate to the first 80 characters" "
This transform only keeps the FIRST 80 characters of each line, which allows
the resulting text to be printed on any terminal without any line break.
  
Information can be lost, though.
  
This transform has a sister: see rcut80";

  (*****************************************************************************************)
  regt t_jk_event         "jk_event" "Jedi Knight localised event trim" "
This transform removes the \"broadcast: print\" instruction
and the quotes from lines containing a localised game event.
  
See the homonym predicate for more information.";

  (*****************************************************************************************)
  regt remove_q3colors    "noq3cols" "Remove Quake3 colour escapes" "
In Quake3-based games, the character ^ is the 'colour escape' which, combined
with a number, changes the current colour. 
    
Of course, since the colours won't be displayed in text logs, these can
become an annoyance.
  
These transform removes any colour escape it finds. For instance
  ^5G^7amall ^5W^7ednesday ^5I^7da^7
becomes
  Gamall Wednesday Ida.";
  
  (*****************************************************************************************)
  regt t_jk_dlgs          "jk_dlgs"  "Jedi Knight dialogue line pretty-print" "
This transform is to be used with the jk_dlgs, jk_adlgs and jk_pdlgs 
predicates.
  
It separates the names and the dialogues very clearly, using different templates
for public and private messages.
  
Tip: rather than use 
  -t jk_dlgs -p jk_adlgs
, you can use
  -t jk_dlgs --no-blank
  
Since this transform will yield a blank line if it fails, those two commands
are identical, except the second one only matches the regular expressions
one time, while the first does it twice. So the second command is a tad faster
than the first.";
  
  (*****************************************************************************************)
  regt t_debugmark        "mark"     "Debug mark. Prepends a mark" "
This basic transform will just add 'MARKED: ' at the beginning of its input.
  
It has no practical use except for debugging the program.";
  
  (*****************************************************************************************)
  regt String.uppercase   "upcase"   "To uppercase" "
This transform turns its input to uppercase.
  
I don't think it is of much use, but I put it here for the sake of example, to 
show how easy it is to add a transform to the program.
  
Besides, this was one of the example filters I shipped with SIS.";
  
  (*****************************************************************************************)
  regt String.lowercase   "lcase"    "To lowercase" "
This transform turns its input to lowercase.
  
Since I has included uppercase (see upcase), I couldn't possibly omit 
lowercase...";
  
  (*****************************************************************************************)
  regt t_id               "id"       "Identity. lines are unaltered (DEFAULT)" "
This transform is the identity: its input is unaltered.
  
This is the default transform, which means it will be used unless you select
another one.
  
This allows you to use the filter with a predicate alone."
end
 

(** {6 Entry point} *)
  
    
(** Welcome message *)
let welcome = "
** Logs Filter v0.1
** by Gamall Wednesday Ida
** email : gamall.ida@gmail.com
** web   : gamall-ida.com
"
(** Display the welcome message *)
let disp_welcome () = pl welcome

(** Help poor dear little user through the command-line options... *)
let disp_help () = disp_welcome () ; pl "
usage: sis [options]
 -p  <str>     : validation predicate
 -t  <str>     : validation transform
 -rp <str>     : rejection predicate
 -rt <str>     : rejection transform
 -o  <str>     : select an opacity. Values are
                 Opaque/Residues/Transparent
 --no-blank    : don't print blank lines
 --help        : print this help and exit
 --help <str>  : print the documentation for a function
 --predicates  : print the list of all valid predicates
 --transforms  : print the list of all valid transforms
 --funs        : print the list of all valid functions
 --docu        : print the whole documentation of all functions
   
"

(** Display function and short description *)
let disp_fun optn fd = pf "%10s : %s\n" optn (get_shortdesc fd)

(** Display a whole functions table *)
let disp_table title h = pl title ; Hashtbl.iter disp_fun h

(** Display all valid predicates *)
let disp_preds () = disp_table (va "PREDICATES: [%2d]" (Hashtbl.length hp)) hp

(** Display all valid transforms *)
let disp_trans () = disp_table (va "TRANSFORMS: [%2d]" (Hashtbl.length ht)) ht
  
(** Print the help page from an optn and a record *)
let print_help_page optn record =
  let sd, hpa = get_shortdesc record, get_helpt record in
  pf "\n%10s : %s\n%s%s\n%s\n" optn sd separator_line hpa separator_line
  
(** The same as [print_help_page] with extra blank lines *)
let print_help_page_line optn record = pl "\n"; print_help_page optn record
  
(** Display a help page concerning a function, identified by its option name *)
let request_help_page optn = let fails = ref 0 in pf "Help page on '%s':\n" optn;
  begin 
    try let therec = Hashtbl.find hp optn in pl "\nPREDICATE:"; print_help_page optn therec 
    with Not_found -> fails := succ !fails
  end; begin 
    try let therec = Hashtbl.find ht optn in pl "\nTRANSFORM:"; print_help_page optn therec 
    with Not_found -> fails := succ !fails
  end; if !fails >= 2 then pl "This function is neither a predicate nor a transform.\nSee --funs"; pl ""
          
(** Display the whole documentation of all functions *)
let disp_doc () =
  pl "DOCUMENTATION OF ALL FUNCTIONS"; pl separator_double_line ; pl "";
  pl "\n\nPREDICATES:"; pl separator_double_line; Hashtbl.iter print_help_page_line hp;
  pl "\n\nTRANSFORMS:"; pl separator_double_line; Hashtbl.iter print_help_page_line ht
  
(** Must I remove blank lines from the output ? This is a kind of bonus filter... *)
let _no_blank = ref false
  
(** CORE FUNCTION : Waits for input on stdin, and decides whether to send it to stdout and/or stderr, with respect to user selected opacity setting, predicate and transformation *)
let do_filter p t rp rt = 
  let do_output s = if not !_no_blank || s <> "" then pl s in
  let do_reject s = if rp s then pe (rt s) in
  while true do 
    let l = read_line () in let pass = p l in if pass then do_output (t l);
    match !_opacity with
      | OPAQUE -> ()
      | RESIDUES -> if not pass then do_reject l
      | TRANSPARENT -> do_reject l
  done
  
(** Get the opacity setting from user command-line string *)
let get_opacity optn = match String.lowercase optn with
  | ("o"|"opaque") -> OPAQUE
  | ("r"|"residues") -> RESIDUES
  | ("t"|"transparent") -> TRANSPARENT
  | x -> failwith (va "\nOpacity value '%s' is invalid.\nPlease see --help\n" x)
  
(** Selected predicate *)
let _p  = ref "all"

(** Selected transform *)
let _t  = ref "id"

(** Selected predicate for lines rejected to [stderr] *)
let _rp = ref "all"

(** Selected transform for lines rejected to [stderr] *)
let _rt = ref "id"
  
(** Register everything... *)
let init () = (do_register_predicates (); do_register_transforms ())
  
(** Parse the command-line *)
let rec pcml = function
  | [] -> ()
  | "-p"::optn::l -> _p := optn ; pcml l
  | "-t"::optn::l -> _t := optn ; pcml l 
  | "-rp"::optn::l -> _rp := optn ; pcml l
  | "-rt"::optn::l -> _rt := optn ; pcml l 
  | "-o"::optn::l -> _opacity := get_opacity optn ; pcml l
  | "--no-blank"::l -> _no_blank := true ; pcml l
  | "--help"::[s] -> init(); disp_welcome (); request_help_page s ; exit 0
  | "--help"::l -> disp_help(); exit 0
  | "--docu"::l -> init(); disp_welcome (); disp_doc(); exit 0
    (* The Third clue is an Arch *)
  | "--predicates"::l -> init(); disp_welcome (); disp_preds () ; exit 0
  | "--transforms"::l -> init(); disp_welcome (); disp_trans () ; exit 0
  | "--funs"::l -> init (); disp_welcome (); disp_preds () ; pl "" ; disp_trans () ; pl ""; exit 0
  | _ -> bad_args () ; failwith "Please correct the command-line and try again... (see --help)"
 
(** Entry point *)
let main () = begin
  pcml (tl al);
  init ();
  do_filter (getp !_p) (gett !_t) (getp !_rp) (gett !_rt)
end
  
(** Entry point with neutralised [End_of_file] *)
let _ = try main() with
  | End_of_file -> exit 0

