···11(* Entity table generator for html5rw.
22 Reads WHATWG entities.json and generates OCaml code. *)
3344+(* Helper functions for jsont *)
55+let json_object = function
66+ | Jsont.Object (obj, _) -> obj
77+ | _ -> failwith "Expected JSON object"
88+99+let json_array = function
1010+ | Jsont.Array (arr, _) -> arr
1111+ | _ -> failwith "Expected JSON array"
1212+1313+let json_number = function
1414+ | Jsont.Number (n, _) -> int_of_float n
1515+ | _ -> failwith "Expected JSON number"
1616+1717+let json_mem name obj =
1818+ match List.find_opt (fun ((n, _), _) -> n = name) obj with
1919+ | Some (_, v) -> Some v
2020+ | None -> None
2121+422let () =
523 let json_file = Sys.argv.(1) in
624 let out_file = Sys.argv.(2) in
···1028 let s = really_input_string ic n in
1129 close_in ic;
12301313- let json = Yojson.Basic.from_string s in
3131+ let json = match Jsont_bytesrw.decode_string Jsont.json s with
3232+ | Ok j -> j
3333+ | Error e -> failwith (Printf.sprintf "JSON parse error: %s" e)
3434+ in
14351536 let oc = open_out out_file in
1637···2445 let entities = ref [] in
2546 let legacy = ref [] in
26472727- (match json with
2828- | `Assoc entries ->
2929- List.iter (fun (name, value) ->
3030- (* name is like "&" or "&" *)
3131- let name_without_amp =
3232- if String.length name > 0 && name.[0] = '&' then
3333- String.sub name 1 (String.length name - 1)
3434- else name
3535- in
3636- let has_semicolon =
3737- String.length name_without_amp > 0 &&
3838- name_without_amp.[String.length name_without_amp - 1] = ';'
3939- in
4040- let key =
4141- if has_semicolon then
4242- String.sub name_without_amp 0 (String.length name_without_amp - 1)
4343- else
4444- name_without_amp
4545- in
4646- (match value with
4747- | `Assoc fields ->
4848- let codepoints =
4949- match List.assoc_opt "codepoints" fields with
5050- | Some (`List cps) ->
5151- List.map (function `Int i -> i | _ -> 0) cps
5252- | _ -> []
5353- in
5454- if codepoints <> [] then begin
5555- entities := (key, codepoints, has_semicolon) :: !entities;
5656- (* Legacy entities are those that appear without semicolon in the JSON *)
5757- if not has_semicolon then
5858- legacy := key :: !legacy
5959- end
6060- | _ -> ())
6161- ) entries
6262- | _ -> failwith "Expected JSON object");
4848+ let entries = json_object json in
4949+ List.iter (fun ((name, _), value) ->
5050+ (* name is like "&" or "&" *)
5151+ let name_without_amp =
5252+ if String.length name > 0 && name.[0] = '&' then
5353+ String.sub name 1 (String.length name - 1)
5454+ else name
5555+ in
5656+ let has_semicolon =
5757+ String.length name_without_amp > 0 &&
5858+ name_without_amp.[String.length name_without_amp - 1] = ';'
5959+ in
6060+ let key =
6161+ if has_semicolon then
6262+ String.sub name_without_amp 0 (String.length name_without_amp - 1)
6363+ else
6464+ name_without_amp
6565+ in
6666+ let fields = json_object value in
6767+ let codepoints =
6868+ match json_mem "codepoints" fields with
6969+ | Some arr -> List.map json_number (json_array arr)
7070+ | None -> []
7171+ in
7272+ if codepoints <> [] then begin
7373+ entities := (key, codepoints, has_semicolon) :: !entities;
7474+ (* Legacy entities are those that appear without semicolon in the JSON *)
7575+ if not has_semicolon then
7676+ legacy := key :: !legacy
7777+ end
7878+ ) entries;
63796480 (* Remove duplicates - prefer semicolon version *)
6581 let seen = Hashtbl.create 2500 in