···66open Cmdliner
7788let psl = lazy (Publicsuffix.create ())
99-109let psl () = Lazy.force psl
11101211(* Helper functions for printing results *)
13121414-let print_error e =
1515- Printf.printf "ERROR: %s\n" (Publicsuffix.error_to_string e)
1616-1717-let print_result = function
1818- | Ok s -> print_endline s
1919- | Error e -> print_error e
1313+let print_error e = Printf.printf "ERROR: %s\n" (Publicsuffix.error_to_string e)
1414+let print_result = function Ok s -> print_endline s | Error e -> print_error e
20152116let print_bool_result = function
2217 | Ok b -> print_endline (string_of_bool b)
···24192520let print_result_with_section = function
2621 | Ok (s, sec) ->
2727- let sec_str = match sec with
2222+ let sec_str =
2323+ match sec with
2824 | Publicsuffix.ICANN -> "ICANN"
2925 | Publicsuffix.Private -> "PRIVATE"
3026 in
···5955 let doc = "Check if a domain is a registrable domain" in
6056 let info = Cmd.info "is_registrable" ~doc in
6157 let term =
6262- Term.(const print_bool_result $ Publicsuffix_cmd.is_registrable_term (psl ()))
5858+ Term.(
5959+ const print_bool_result $ Publicsuffix_cmd.is_registrable_term (psl ()))
6360 in
6461 Cmd.v info term
6562···6764 let doc = "Get the registrable domain with section information" in
6865 let info = Cmd.info "registrable_section" ~doc in
6966 let term =
7070- Term.(const print_result_with_section
6767+ Term.(
6868+ const print_result_with_section
7169 $ Publicsuffix_cmd.registrable_section_term (psl ()))
7270 in
7371 Cmd.v info term
···7674 let doc = "Get the public suffix with section information" in
7775 let info = Cmd.info "suffix_section" ~doc in
7876 let term =
7979- Term.(const print_result_with_section
7777+ Term.(
7878+ const print_result_with_section
8079 $ Publicsuffix_cmd.suffix_section_term (psl ()))
8180 in
8281 Cmd.v info term
···8584 let doc = "Print statistics about the Public Suffix List" in
8685 let info = Cmd.info "stats" ~doc in
8786 let term =
8888- Term.(const (fun (total, icann, private_rules) ->
8989- Printf.printf "Total rules: %d\n" total;
9090- Printf.printf "ICANN rules: %d\n" icann;
9191- Printf.printf "Private rules: %d\n" private_rules)
8787+ Term.(
8888+ const (fun (total, icann, private_rules) ->
8989+ Printf.printf "Total rules: %d\n" total;
9090+ Printf.printf "ICANN rules: %d\n" icann;
9191+ Printf.printf "Private rules: %d\n" private_rules)
9292 $ Publicsuffix_cmd.stats_term (psl ()))
9393 in
9494 Cmd.v info term
···9797 let doc = "Print version information about the Public Suffix List data" in
9898 let info = Cmd.info "version" ~doc in
9999 let term =
100100- Term.(const (fun (version, commit) ->
101101- Printf.printf "Version: %s\n" version;
102102- Printf.printf "Commit: %s\n" commit)
100100+ Term.(
101101+ const (fun (version, commit) ->
102102+ Printf.printf "Version: %s\n" version;
103103+ Printf.printf "Commit: %s\n" commit)
103104 $ Publicsuffix_cmd.version_term (psl ()))
104105 in
105106 Cmd.v info term
···108109 let doc = "Query the Public Suffix List" in
109110 let sdocs = Manpage.s_common_options in
110111 let info = Cmd.info "publicsuffix" ~version:"%%VERSION%%" ~doc ~sdocs in
111111- Cmd.group info [
112112- registrable_cmd;
113113- suffix_cmd;
114114- is_suffix_cmd;
115115- is_registrable_cmd;
116116- registrable_section_cmd;
117117- suffix_section_cmd;
118118- stats_cmd;
119119- version_cmd;
120120- ]
112112+ Cmd.group info
113113+ [
114114+ registrable_cmd;
115115+ suffix_cmd;
116116+ is_suffix_cmd;
117117+ is_registrable_cmd;
118118+ registrable_section_cmd;
119119+ suffix_section_cmd;
120120+ stats_cmd;
121121+ version_cmd;
122122+ ]
121123122124let () = exit (Cmd.eval default_cmd)
···2626(** Rule types *)
2727type rule_type = Normal | Wildcard | Exception
28282929-(** A parsed rule *)
3029type rule = {
3131- labels : string list; (* Labels in reverse order: ["uk"; "co"] for co.uk *)
3030+ labels : string list; (* Labels in reverse order: ["uk"; "co"] for co.uk *)
3231 rule_type : rule_type;
3332 section : section;
3433}
3434+(** A parsed rule *)
35353636-(** Trie node for efficient lookup *)
3736type trie_node = {
3837 id : int; (* Unique identifier for this node *)
3938 mutable rule : (rule_type * section) option;
4039 mutable children : (string * trie_node) list;
4140 mutable wildcard_child : trie_node option;
4241}
4242+(** Trie node for efficient lookup *)
43434444let node_id_counter = ref 0
4545···5353 (* Strip comments (looking for //) *)
5454 let line =
5555 match String.index_opt line '/' with
5656- | Some i when i > 0 && line.[i-1] = '/' -> String.sub line 0 (i-1)
5656+ | Some i when i > 0 && line.[i - 1] = '/' -> String.sub line 0 (i - 1)
5757 | Some 0 -> ""
5858 | _ -> line
5959 in
6060 (* Take only up to first whitespace and trim *)
6161 let line =
6262- String.trim line
6363- |> fun s ->
6464- match String.index_from_opt s 0 ' ', String.index_from_opt s 0 '\t' with
6262+ String.trim line |> fun s ->
6363+ match (String.index_from_opt s 0 ' ', String.index_from_opt s 0 '\t') with
6564 | Some i, Some j -> String.sub s 0 (min i j)
6665 | Some i, None | None, Some i -> String.sub s 0 i
6766 | None, None -> s
···7574 (Exception, String.sub line 1 (String.length line - 1))
7675 else if String.length line > 2 && line.[0] = '*' && line.[1] = '.' then
7776 (Wildcard, String.sub line 2 (String.length line - 2))
7878- else
7979- (Normal, line)
7777+ else (Normal, line)
8078 in
8179 (* Process labels: split, reverse, filter, and encode *)
8280 let labels =
···8886 | Ok encoded -> String.lowercase_ascii encoded
8987 | Error _ -> String.lowercase_ascii label)
9088 in
9191- if labels = [] then None
9292- else Some { labels; rule_type; section }
8989+ if labels = [] then None else Some { labels; rule_type; section }
93909491(** Insert a rule into the trie *)
9592let insert_rule trie rule =
···108105 c
109106 in
110107 child.rule <- Some (Wildcard, rule.section)
111111- end else
112112- node.rule <- Some (rule.rule_type, rule.section)
108108+ end
109109+ else node.rule <- Some (rule.rule_type, rule.section)
113110 | label :: rest ->
114111 (* Find or create child for this label *)
115112 let child =
···144141 (* Helper to extract value after "KEY: " pattern *)
145142 let extract_value line prefix =
146143 let prefix_len = String.length prefix in
147147- if String.length line > prefix_len &&
148148- String.sub line 0 prefix_len = prefix then
149149- Some (String.trim (String.sub line prefix_len (String.length line - prefix_len)))
150150- else
151151- None
144144+ if String.length line > prefix_len && String.sub line 0 prefix_len = prefix
145145+ then
146146+ Some
147147+ (String.trim
148148+ (String.sub line prefix_len (String.length line - prefix_len)))
149149+ else None
152150 in
153151 try
154152 while true do
155153 let line = input_line ic in
156154 (* Check for version and commit info *)
157157- if !version = None then
158158- version := extract_value line "// VERSION: ";
159159- if !commit = None then
160160- commit := extract_value line "// COMMIT: ";
155155+ if !version = None then version := extract_value line "// VERSION: ";
156156+ if !commit = None then commit := extract_value line "// COMMIT: ";
161157 (* Check for section markers *)
162158 if contains_substring line "===BEGIN ICANN DOMAINS===" then
163159 current_section := ICANN
164160 else if contains_substring line "===BEGIN PRIVATE DOMAINS===" then
165161 current_section := Private
166162 else
167167- Option.iter (fun rule ->
163163+ Option.iter
164164+ (fun rule ->
168165 insert_rule trie rule;
169166 incr rule_count;
170167 if rule.section = ICANN then incr icann_count
171171- else incr private_count
172172- ) (parse_line !current_section line)
168168+ else incr private_count)
169169+ (parse_line !current_section line)
173170 done;
174171 (trie, !rule_count, !icann_count, !private_count, !version, !commit)
175172 with End_of_file ->
···179176(** Escape a string for OCaml source code *)
180177let escape_string s =
181178 let b = Buffer.create (String.length s * 2) in
182182- String.iter (fun c ->
183183- match c with
184184- | '"' -> Buffer.add_string b "\\\""
185185- | '\\' -> Buffer.add_string b "\\\\"
186186- | '\n' -> Buffer.add_string b "\\n"
187187- | '\r' -> Buffer.add_string b "\\r"
188188- | '\t' -> Buffer.add_string b "\\t"
189189- | c when Char.code c < 32 || Char.code c > 126 ->
190190- (* For non-ASCII, we keep the UTF-8 bytes as-is since OCaml handles UTF-8 strings *)
191191- Buffer.add_char b c
192192- | c -> Buffer.add_char b c
193193- ) s;
179179+ String.iter
180180+ (fun c ->
181181+ match c with
182182+ | '"' -> Buffer.add_string b "\\\""
183183+ | '\\' -> Buffer.add_string b "\\\\"
184184+ | '\n' -> Buffer.add_string b "\\n"
185185+ | '\r' -> Buffer.add_string b "\\r"
186186+ | '\t' -> Buffer.add_string b "\\t"
187187+ | c when Char.code c < 32 || Char.code c > 126 ->
188188+ (* For non-ASCII, we keep the UTF-8 bytes as-is since OCaml handles UTF-8 strings *)
189189+ Buffer.add_char b c
190190+ | c -> Buffer.add_char b c)
191191+ s;
194192 Buffer.contents b
195193196194(** Generate OCaml code for the trie *)
197195let generate_code trie rule_count icann_count private_count version commit =
198196 (* Print header *)
199199- print_string {|(* Auto-generated from public_suffix_list.dat - DO NOT EDIT *)
197197+ print_string
198198+ {|(* Auto-generated from public_suffix_list.dat - DO NOT EDIT *)
200199(* This file contains the parsed Public Suffix List as OCaml data structures *)
201200202201(** Section of the PSL where a rule originates *)
···239238240239 let rec generate_node node =
241240 let node_id = node.id in
242242- if Hashtbl.mem generated node_id then
243243- Hashtbl.find node_names node_id
241241+ if Hashtbl.mem generated node_id then Hashtbl.find node_names node_id
244242 else begin
245243 (* First generate all children *)
246244 List.iter (fun (_, child) -> ignore (generate_node child)) node.children;
247247- Option.iter (fun child -> ignore (generate_node child)) node.wildcard_child;
245245+ Option.iter
246246+ (fun child -> ignore (generate_node child))
247247+ node.wildcard_child;
248248249249 let name = Hashtbl.find node_names node_id in
250250···253253254254 (* Rule field *)
255255 (match node.rule with
256256- | None -> Buffer.add_string output_buffer " rule = None;\n"
257257- | Some (rt, sec) ->
258258- let rt_str = match rt with
259259- | Normal -> "Normal"
260260- | Wildcard -> "Wildcard"
261261- | Exception -> "Exception"
262262- in
263263- let sec_str = match sec with ICANN -> "ICANN" | Private -> "Private" in
264264- Buffer.add_string output_buffer
265265- (Printf.sprintf " rule = Some (%s, %s);\n" rt_str sec_str));
256256+ | None -> Buffer.add_string output_buffer " rule = None;\n"
257257+ | Some (rt, sec) ->
258258+ let rt_str =
259259+ match rt with
260260+ | Normal -> "Normal"
261261+ | Wildcard -> "Wildcard"
262262+ | Exception -> "Exception"
263263+ in
264264+ let sec_str =
265265+ match sec with ICANN -> "ICANN" | Private -> "Private"
266266+ in
267267+ Buffer.add_string output_buffer
268268+ (Printf.sprintf " rule = Some (%s, %s);\n" rt_str sec_str));
266269267270 (* Children field *)
268271 if node.children = [] then
269272 Buffer.add_string output_buffer " children = [];\n"
270273 else begin
271274 Buffer.add_string output_buffer " children = [\n";
272272- List.iter (fun (label, child) ->
273273- let child_name = Hashtbl.find node_names child.id in
274274- Buffer.add_string output_buffer
275275- (Printf.sprintf " (\"%s\", %s);\n" (escape_string label) child_name)
276276- ) node.children;
275275+ List.iter
276276+ (fun (label, child) ->
277277+ let child_name = Hashtbl.find node_names child.id in
278278+ Buffer.add_string output_buffer
279279+ (Printf.sprintf " (\"%s\", %s);\n" (escape_string label)
280280+ child_name))
281281+ node.children;
277282 Buffer.add_string output_buffer " ];\n"
278283 end;
279284280285 (* Wildcard child field *)
281286 (match node.wildcard_child with
282282- | None -> Buffer.add_string output_buffer " wildcard_child = None;\n"
283283- | Some child ->
284284- let child_name = Hashtbl.find node_names child.id in
285285- Buffer.add_string output_buffer
286286- (Printf.sprintf " wildcard_child = Some %s;\n" child_name));
287287+ | None -> Buffer.add_string output_buffer " wildcard_child = None;\n"
288288+ | Some child ->
289289+ let child_name = Hashtbl.find node_names child.id in
290290+ Buffer.add_string output_buffer
291291+ (Printf.sprintf " wildcard_child = Some %s;\n" child_name));
287292288293 Buffer.add_string output_buffer "}\n\n";
289294···297302 Printf.printf "let root = %s\n" root_name;
298303299304 (* Generate helper to get the root *)
300300- print_string {|
305305+ print_string
306306+ {|
301307(** Get the root of the suffix trie *)
302308let get_root () = root
303309···316322 exit 1
317323 end;
318324 let filename = Sys.argv.(1) in
319319- let trie, rule_count, icann_count, private_count, version, commit = parse_file filename in
325325+ let trie, rule_count, icann_count, private_count, version, commit =
326326+ parse_file filename
327327+ in
320328 (* Ensure version and commit are present *)
321321- let version = match version with
329329+ let version =
330330+ match version with
322331 | Some v -> v
323332 | None ->
324333 Printf.eprintf "ERROR: VERSION not found in %s\n" filename;
325334 exit 1
326335 in
327327- let commit = match commit with
336336+ let commit =
337337+ match commit with
328338 | Some c -> c
329339 | None ->
330340 Printf.eprintf "ERROR: COMMIT not found in %s\n" filename;
···66(** Reusable Cmdliner terms for the publicsuffix library.
7788 This module provides argument parsers and term functions that can be
99- composed to build command-line tools that work with the Public Suffix List. *)
99+ composed to build command-line tools that work with the Public Suffix List.
1010+*)
10111112(** {1 Argument terms} *)
1213···4647 (total_rules, icann_rules, private_rules). *)
47484849val version_term : Publicsuffix.t -> (string * string) Cmdliner.Term.t
4949-(** Term that returns version information about the Public Suffix List as a tuple of
5050- (version, commit). *)
5050+(** Term that returns version information about the Public Suffix List as a
5151+ tuple of (version, commit). *)
+59-61
lib/publicsuffix.ml
···2424(* Bring the trie_node type and its fields into scope *)
2525open Publicsuffix_data
26262727-type t = {
2828- root : trie_node;
2929-}
2727+type t = { root : trie_node }
30283129type error =
3230 | Empty_domain
···4240 | Leading_dot -> Format.fprintf fmt "Domain has a leading dot"
4341 | Punycode_error s -> Format.fprintf fmt "Punycode conversion error: %s" s
4442 | No_public_suffix -> Format.fprintf fmt "No public suffix found"
4545- | Domain_is_public_suffix -> Format.fprintf fmt "Domain is itself a public suffix"
4343+ | Domain_is_public_suffix ->
4444+ Format.fprintf fmt "Domain is itself a public suffix"
46454747-let error_to_string err =
4848- Format.asprintf "%a" pp_error err
4949-5050-let create () =
5151- { root = Publicsuffix_data.get_root () }
4646+let error_to_string err = Format.asprintf "%a" pp_error err
4747+let create () = { root = Publicsuffix_data.get_root () }
52485349(* Find a child node by label (case-insensitive) *)
5450let find_child (node : trie_node) label =
5551 let label_lower = String.lowercase_ascii label in
5656- List.find_opt (fun (l, _) -> String.lowercase_ascii l = label_lower) node.children
5252+ List.find_opt
5353+ (fun (l, _) -> String.lowercase_ascii l = label_lower)
5454+ node.children
5755 |> Option.map snd
58565959-(** Result of matching a domain against the trie *)
6057type match_result = {
6161- matched_labels : int; (* Number of labels matched *)
6262- section : section; (* Section of the rule *)
6363- is_exception : bool; (* Whether this is an exception rule *)
5858+ matched_labels : int; (* Number of labels matched *)
5959+ section : section; (* Section of the rule *)
6060+ is_exception : bool; (* Whether this is an exception rule *)
6461}
6262+(** Result of matching a domain against the trie *)
65636666-(** Find all matching rules for a domain.
6767- Labels should be in reverse order (TLD first). *)
6464+(** Find all matching rules for a domain. Labels should be in reverse order (TLD
6565+ first). *)
6866let find_matches (root : trie_node) labels =
6967 let matches = ref [] in
70687169 (* Track whether we matched the implicit * rule *)
7272- let implicit_match = {
7373- matched_labels = 1;
7474- section = ICANN; (* Implicit rule is considered ICANN *)
7575- is_exception = false;
7676- } in
7070+ let implicit_match =
7171+ {
7272+ matched_labels = 1;
7373+ section = ICANN;
7474+ (* Implicit rule is considered ICANN *)
7575+ is_exception = false;
7676+ }
7777+ in
77787879 let rec traverse (node : trie_node) depth remaining_labels =
7980 (* Check if current node has a rule *)
8080- Option.iter (fun (rt, sec) ->
8181- let m = {
8282- matched_labels = depth;
8383- section = sec;
8484- is_exception = (rt = Exception);
8585- } in
8686- matches := m :: !matches
8787- ) node.rule;
8181+ Option.iter
8282+ (fun (rt, sec) ->
8383+ let m =
8484+ {
8585+ matched_labels = depth;
8686+ section = sec;
8787+ is_exception = rt = Exception;
8888+ }
8989+ in
9090+ matches := m :: !matches)
9191+ node.rule;
88928993 (* Continue traversing if we have more labels *)
9094 match remaining_labels with
···9397 (* Check for wildcard match *)
9498 node.wildcard_child
9599 |> Option.iter (fun wc ->
9696- Option.iter (fun (rt, sec) ->
9797- let m = {
9898- matched_labels = depth + 1;
9999- section = sec;
100100- is_exception = (rt = Exception);
101101- } in
102102- matches := m :: !matches
103103- ) wc.rule);
100100+ Option.iter
101101+ (fun (rt, sec) ->
102102+ let m =
103103+ {
104104+ matched_labels = depth + 1;
105105+ section = sec;
106106+ is_exception = rt = Exception;
107107+ }
108108+ in
109109+ matches := m :: !matches)
110110+ wc.rule);
104111105112 (* Check for exact label match *)
106113 find_child node label
···110117 traverse root 0 labels;
111118112119 (* If no matches, return the implicit * rule *)
113113- if !matches = [] then [implicit_match]
114114- else !matches
120120+ if !matches = [] then [ implicit_match ] else !matches
115121116116-(** Select the prevailing rule from a list of matches.
117117- Per the algorithm:
118118- 1. Exception rules take priority
119119- 2. Otherwise, the rule with the most labels wins
120120-*)
122122+(** Select the prevailing rule from a list of matches. Per the algorithm: 1.
123123+ Exception rules take priority 2. Otherwise, the rule with the most labels
124124+ wins *)
121125let select_prevailing_rule matches =
122126 match List.find_opt (fun m -> m.is_exception) matches with
123123- | Some ex -> ex (* Exception rules take priority *)
127127+ | Some ex -> ex (* Exception rules take priority *)
124128 | None ->
125129 (* Find the rule with the most labels *)
126126- List.fold_left (fun best m ->
127127- if m.matched_labels > best.matched_labels then m else best
128128- ) (List.hd matches) matches
130130+ List.fold_left
131131+ (fun best m ->
132132+ if m.matched_labels > best.matched_labels then m else best)
133133+ (List.hd matches) matches
129134130135(** Normalize a domain for lookup:
131136 - Convert to lowercase
132137 - Convert IDN to Punycode
133138 - Split into labels
134134- - Handle trailing dots
135135-*)
139139+ - Handle trailing dots *)
136140let normalize_domain domain =
137141 if domain = "" then Error Empty_domain
138142 else if String.length domain > 0 && domain.[0] = '.' then Error Leading_dot
···142146 String.length domain > 0 && domain.[String.length domain - 1] = '.'
143147 in
144148 let domain =
145145- if has_trailing_dot then
146146- String.sub domain 0 (String.length domain - 1)
149149+ if has_trailing_dot then String.sub domain 0 (String.length domain - 1)
147150 else domain
148151 in
149152 if domain = "" then Error Empty_domain
···171174(** Take the rightmost n elements from a list *)
172175let take_last n lst =
173176 let len = List.length lst in
174174- if len <= n then lst
175175- else List.filteri (fun i _ -> i >= len - n) lst
177177+ if len <= n then lst else List.filteri (fun i _ -> i >= len - n) lst
176178177179(** Calculate the number of public suffix labels from a prevailing rule *)
178180let suffix_label_count prevailing =
179181 if prevailing.is_exception then
180182 (* Exception rules: remove leftmost label from the rule *)
181183 prevailing.matched_labels - 1
182182- else
183183- prevailing.matched_labels
184184+ else prevailing.matched_labels
184185185186(** Find the prevailing rule for a domain *)
186187let find_prevailing_rule t labels =
···194195 | Ok (labels, has_trailing_dot) ->
195196 let prevailing = find_prevailing_rule t labels in
196197 let count = suffix_label_count prevailing in
197197- if count > List.length labels then
198198- Error No_public_suffix
198198+ if count > List.length labels then Error No_public_suffix
199199 else
200200 let suffix_labels = take_last count labels in
201201 let suffix = labels_to_domain suffix_labels has_trailing_dot in
···212212 let count = suffix_label_count prevailing in
213213 (* Registrable domain = suffix + 1 label *)
214214 let reg_label_count = count + 1 in
215215- if reg_label_count > List.length labels then
216216- Error Domain_is_public_suffix
215215+ if reg_label_count > List.length labels then Error Domain_is_public_suffix
217216 else
218217 let reg_labels = take_last reg_label_count labels in
219218 let reg_domain = labels_to_domain reg_labels has_trailing_dot in
···244243let rule_count _t = Publicsuffix_data.rule_count
245244let icann_rule_count _t = Publicsuffix_data.icann_rule_count
246245let private_rule_count _t = Publicsuffix_data.private_rule_count
247247-248246let version _t = Publicsuffix_data.version
249247let commit _t = Publicsuffix_data.commit
+112-117
lib/publicsuffix.mli
···5566(** Public Suffix List implementation for OCaml
7788- This library provides functions to query the Mozilla Public Suffix List (PSL)
99- to determine public suffixes and registrable domains. It implements the
1010- algorithm specified at {{:https://publicsuffix.org/list/} publicsuffix.org}.
88+ This library provides functions to query the Mozilla Public Suffix List
99+ (PSL) to determine public suffixes and registrable domains. It implements
1010+ the algorithm specified at
1111+ {{:https://publicsuffix.org/list/} publicsuffix.org}.
11121213 {1 Overview}
13141415 The Public Suffix List is a cross-vendor initiative to provide an accurate
1515- list of domain name suffixes under which Internet users can directly register
1616- names. A "public suffix" is one under which Internet users can register names.
1717- Some examples of public suffixes are [.com], [.co.uk] and [.pvt.k12.ma.us].
1616+ list of domain name suffixes under which Internet users can directly
1717+ register names. A "public suffix" is one under which Internet users can
1818+ register names. Some examples of public suffixes are [.com], [.co.uk] and
1919+ [.pvt.k12.ma.us].
18201919- The "registrable domain" is the public suffix plus one additional label.
2020- For example, for the domain [www.example.com], the public suffix is [.com]
2121- and the registrable domain is [example.com].
2121+ The "registrable domain" is the public suffix plus one additional label. For
2222+ example, for the domain [www.example.com], the public suffix is [.com] and
2323+ the registrable domain is [example.com].
22242323- Domain names follow the specifications in {{:https://datatracker.ietf.org/doc/html/rfc1034}RFC 1034}
2424- and {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}, which define
2525- the Domain Name System concepts and implementation.
2525+ Domain names follow the specifications in
2626+ {{:https://datatracker.ietf.org/doc/html/rfc1034}RFC 1034} and
2727+ {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}, which define the
2828+ Domain Name System concepts and implementation.
26292730 {1 Sections}
2831···5053 let psl = Publicsuffix.create () in
51545255 (* Get the public suffix of a domain *)
5353- Publicsuffix.public_suffix psl "www.example.com"
5454- (* Returns: Ok "com" *)
5555-5656- Publicsuffix.public_suffix psl "www.example.co.uk"
5757- (* Returns: Ok "co.uk" *)
5858-5959- (* Get the registrable domain *)
6060- Publicsuffix.registrable_domain psl "www.example.com"
6161- (* Returns: Ok "example.com" *)
6262-6363- (* Check if a domain is a public suffix *)
6464- Publicsuffix.is_public_suffix psl "com"
6565- (* Returns: Ok true *)
6666-6767- Publicsuffix.is_public_suffix psl "example.com"
5656+ Publicsuffix.public_suffix psl "www.example.com" (* Returns: Ok "com" *)
5757+ Publicsuffix.public_suffix psl
5858+ "www.example.co.uk" (* Returns: Ok "co.uk" *)
5959+ (* Get the registrable domain *)
6060+ Publicsuffix.registrable_domain psl
6161+ "www.example.com" (* Returns: Ok "example.com" *)
6262+ (* Check if a domain is a public suffix *)
6363+ Publicsuffix.is_public_suffix psl "com" (* Returns: Ok true *)
6464+ Publicsuffix.is_public_suffix psl "example.com"
6865 (* Returns: Ok false *)
6966 ]}
70677168 {1 Internationalized Domain Names}
72697370 The library handles internationalized domain names (IDN) by converting them
7474- to Punycode (ASCII-compatible encoding) before lookup, following the IDNA2008
7575- protocol defined in {{:https://datatracker.ietf.org/doc/html/rfc5890}RFC 5890}
7676- (IDNA Definitions) and {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891}
7171+ to Punycode (ASCII-compatible encoding) before lookup, following the
7272+ IDNA2008 protocol defined in
7373+ {{:https://datatracker.ietf.org/doc/html/rfc5890}RFC 5890} (IDNA
7474+ Definitions) and {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891}
7775 (IDNA Protocol). The conversion is performed using [Punycode_idna.to_ascii].
78767979- Punycode encoding, specified in {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492},
8080- uniquely and reversibly transforms Unicode strings into ASCII-compatible
8181- strings using the "xn--" prefix (ACE prefix). See the [Punycode] library for
8282- the core encoding implementation. Both Unicode and Punycode input are accepted:
7777+ Punycode encoding, specified in
7878+ {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492}, uniquely and
7979+ reversibly transforms Unicode strings into ASCII-compatible strings using
8080+ the "xn--" prefix (ACE prefix). See the [Punycode] library for the core
8181+ encoding implementation. Both Unicode and Punycode input are accepted:
83828483 {[
8585- Publicsuffix.registrable_domain psl "www.食狮.com.cn"
8686- (* Returns: Ok "食狮.com.cn" *)
8787-8888- Publicsuffix.registrable_domain psl "www.xn--85x722f.com.cn"
8484+ Publicsuffix.registrable_domain psl
8585+ "www.食狮.com.cn" (* Returns: Ok "食狮.com.cn" *)
8686+ Publicsuffix.registrable_domain psl "www.xn--85x722f.com.cn"
8987 (* Returns: Ok "xn--85x722f.com.cn" *)
9088 ]}
9189···9593 names) are preserved in the output:
96949795 {[
9898- Publicsuffix.public_suffix psl "example.com"
9999- (* Returns: Ok "com" *)
100100-101101- Publicsuffix.public_suffix psl "example.com."
9696+ Publicsuffix.public_suffix psl "example.com" (* Returns: Ok "com" *)
9797+ Publicsuffix.public_suffix psl "example.com."
10298 (* Returns: Ok "com." *)
10399 ]}
104100···106102107103 This library implementation is based on the following specifications:
108104109109- {ul
110110- {- {{:https://publicsuffix.org/list/} Public Suffix List Specification} - The algorithm and list format}
111111- {- {{:https://datatracker.ietf.org/doc/html/rfc1034}RFC 1034} - Domain Names: Concepts and Facilities}
112112- {- {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035} - Domain Names: Implementation and Specification}
113113- {- {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492} - Punycode: A Bootstring encoding of Unicode for IDNA}
114114- {- {{:https://datatracker.ietf.org/doc/html/rfc5890}RFC 5890} - Internationalized Domain Names for Applications (IDNA): Definitions}
115115- {- {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891} - Internationalized Domain Names in Applications (IDNA): Protocol}}
105105+ - {{:https://publicsuffix.org/list/} Public Suffix List Specification} - The
106106+ algorithm and list format
107107+ - {{:https://datatracker.ietf.org/doc/html/rfc1034}RFC 1034} - Domain Names:
108108+ Concepts and Facilities
109109+ - {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035} - Domain Names:
110110+ Implementation and Specification
111111+ - {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492} - Punycode: A
112112+ Bootstring encoding of Unicode for IDNA
113113+ - {{:https://datatracker.ietf.org/doc/html/rfc5890}RFC 5890} -
114114+ Internationalized Domain Names for Applications (IDNA): Definitions
115115+ - {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891} -
116116+ Internationalized Domain Names in Applications (IDNA): Protocol
116117117118 {1 Related Libraries}
118119119119- {ul
120120- {- [Punycode] - Core Punycode encoding/decoding implementation}
121121- {- [Punycode_idna] - IDNA ToASCII/ToUnicode operations used for IDN conversion}}
122122-*)
120120+ - [Punycode] - Core Punycode encoding/decoding implementation
121121+ - [Punycode_idna] - IDNA ToASCII/ToUnicode operations used for IDN
122122+ conversion *)
123123124124(** {1 Types} *)
125125126126(** Section of the Public Suffix List where a rule originates *)
127127type section =
128128- | ICANN (** Domains delegated by ICANN or in the IANA root zone *)
128128+ | ICANN (** Domains delegated by ICANN or in the IANA root zone *)
129129 | Private (** Domains submitted by private parties *)
130130131131-(** A handle to the parsed Public Suffix List *)
132131type t
132132+(** A handle to the parsed Public Suffix List *)
133133134134(** {1 Errors} *)
135135136136(** Errors that can occur during PSL operations *)
137137type error =
138138- | Empty_domain
139139- (** The input domain was empty *)
138138+ | Empty_domain (** The input domain was empty *)
140139 | Invalid_domain of string
141141- (** The domain could not be parsed as a valid domain name.
142142- Domain names must conform to the syntax specified in
140140+ (** The domain could not be parsed as a valid domain name. Domain names
141141+ must conform to the syntax specified in
143142 {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}. *)
144143 | Leading_dot
145145- (** The domain has a leading dot (e.g., [.example.com]).
146146- Per {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035},
147147- domain names should not have leading dots. *)
144144+ (** The domain has a leading dot (e.g., [.example.com]). Per
145145+ {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}, domain
146146+ names should not have leading dots. *)
148147 | Punycode_error of string
149149- (** Failed to convert internationalized domain to Punycode encoding.
150150- The string contains the error message from [Punycode_idna].
151151- See {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492}
152152- for Punycode encoding requirements and
153153- {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891}
154154- for IDNA protocol requirements. *)
148148+ (** Failed to convert internationalized domain to Punycode encoding. The
149149+ string contains the error message from [Punycode_idna]. See
150150+ {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492} for
151151+ Punycode encoding requirements and
152152+ {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891} for IDNA
153153+ protocol requirements. *)
155154 | No_public_suffix
156156- (** The domain has no public suffix (should not happen with valid domains) *)
155155+ (** The domain has no public suffix (should not happen with valid domains)
156156+ *)
157157 | Domain_is_public_suffix
158158 (** The domain is itself a public suffix and has no registrable domain *)
159159160160-(** Pretty-print an error *)
161160val pp_error : Format.formatter -> error -> unit
161161+(** Pretty-print an error *)
162162163163+val error_to_string : error -> string
163164(** Convert an error to a human-readable string *)
164164-val error_to_string : error -> string
165165166166(** {1 Creation} *)
167167168168-(** Create a PSL instance using the embedded Public Suffix List data.
169169- The data is compiled into the library at build time. *)
170168val create : unit -> t
169169+(** Create a PSL instance using the embedded Public Suffix List data. The data
170170+ is compiled into the library at build time. *)
171171172172(** {1 Core Operations} *)
173173174174+val public_suffix : t -> string -> (string, error) result
174175(** [public_suffix t domain] returns the public suffix portion of [domain].
175176176177 The public suffix is determined by the PSL algorithm:
···180181181182 Domain names are processed according to
182183 {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035} syntax.
183183- Internationalized domain names (IDN) are automatically converted to
184184- Punycode per {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492}
185185- before matching.
184184+ Internationalized domain names (IDN) are automatically converted to Punycode
185185+ per {{:https://datatracker.ietf.org/doc/html/rfc3492}RFC 3492} before
186186+ matching.
186187187188 @param t The PSL instance
188189 @param domain The domain name to query (Unicode or Punycode)
···192193 - [public_suffix t "www.example.com"] returns [Ok "com"]
193194 - [public_suffix t "www.example.co.uk"] returns [Ok "co.uk"]
194195 - [public_suffix t "test.k12.ak.us"] returns [Ok "k12.ak.us"]
195195- - [public_suffix t "city.kobe.jp"] returns [Ok "jp"] (exception rule)
196196-*)
197197-val public_suffix : t -> string -> (string, error) result
196196+ - [public_suffix t "city.kobe.jp"] returns [Ok "jp"] (exception rule) *)
198197198198+val public_suffix_with_section : t -> string -> (string * section, error) result
199199(** [public_suffix_with_section t domain] is like {!public_suffix} but also
200200 returns the section (ICANN or Private) where the matching rule was found.
201201202202- If the implicit [*] rule was used (no explicit rule matched), the section
203203- is [ICANN].
202202+ If the implicit [*] rule was used (no explicit rule matched), the section is
203203+ [ICANN].
204204205205- @return [Ok (suffix, section)] or [Error e] on failure
206206-*)
207207-val public_suffix_with_section : t -> string -> (string * section, error) result
205205+ @return [Ok (suffix, section)] or [Error e] on failure *)
208206207207+val registrable_domain : t -> string -> (string, error) result
209208(** [registrable_domain t domain] returns the registrable domain portion.
210209211211- The registrable domain is the public suffix plus one additional label.
212212- This is the highest-level domain that can be registered by a user.
210210+ The registrable domain is the public suffix plus one additional label. This
211211+ is the highest-level domain that can be registered by a user.
213212214213 Domain labels follow the naming restrictions specified in
215215- {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}. Internationalized
216216- domain names are handled per {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891}.
214214+ {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}.
215215+ Internationalized domain names are handled per
216216+ {{:https://datatracker.ietf.org/doc/html/rfc5891}RFC 5891}.
217217218218 @param t The PSL instance
219219 @param domain The domain name to query
···225225 Examples:
226226 - [registrable_domain t "www.example.com"] returns [Ok "example.com"]
227227 - [registrable_domain t "example.com"] returns [Ok "example.com"]
228228- - [registrable_domain t "com"] returns [Error Domain_is_public_suffix]
229229-*)
230230-val registrable_domain : t -> string -> (string, error) result
228228+ - [registrable_domain t "com"] returns [Error Domain_is_public_suffix] *)
231229232232-(** [registrable_domain_with_section t domain] is like {!registrable_domain}
233233- but also returns the section where the matching rule was found.
230230+val registrable_domain_with_section :
231231+ t -> string -> (string * section, error) result
232232+(** [registrable_domain_with_section t domain] is like {!registrable_domain} but
233233+ also returns the section where the matching rule was found.
234234235235- @return [Ok (domain, section)] or [Error e] on failure
236236-*)
237237-val registrable_domain_with_section : t -> string -> (string * section, error) result
235235+ @return [Ok (domain, section)] or [Error e] on failure *)
238236239237(** {1 Predicates} *)
240238241241-(** [is_public_suffix t domain] returns [true] if [domain] is exactly a
242242- public suffix according to the PSL.
239239+val is_public_suffix : t -> string -> (bool, error) result
240240+(** [is_public_suffix t domain] returns [true] if [domain] is exactly a public
241241+ suffix according to the PSL.
243242244244- Note: This returns [true] if the domain matches a rule exactly, not if
245245- it's under a wildcard. For example:
243243+ Note: This returns [true] if the domain matches a rule exactly, not if it's
244244+ under a wildcard. For example:
246245 - [is_public_suffix t "com"] returns [Ok true]
247246 - [is_public_suffix t "example.com"] returns [Ok false]
248247 - [is_public_suffix t "foo.ck"] returns [Ok true] (due to [*.ck] rule)
249249- - [is_public_suffix t "www.ck"] returns [Ok false] (due to [!www.ck] exception)
250250-*)
251251-val is_public_suffix : t -> string -> (bool, error) result
248248+ - [is_public_suffix t "www.ck"] returns [Ok false] (due to [!www.ck]
249249+ exception) *)
252250253253-(** [is_registrable_domain t domain] returns [true] if [domain] is exactly
254254- a registrable domain (public suffix plus one label, no more).
251251+val is_registrable_domain : t -> string -> (bool, error) result
252252+(** [is_registrable_domain t domain] returns [true] if [domain] is exactly a
253253+ registrable domain (public suffix plus one label, no more).
255254256255 Examples:
257256 - [is_registrable_domain t "example.com"] returns [Ok true]
258257 - [is_registrable_domain t "www.example.com"] returns [Ok false]
259259- - [is_registrable_domain t "com"] returns [Ok false]
260260-*)
261261-val is_registrable_domain : t -> string -> (bool, error) result
258258+ - [is_registrable_domain t "com"] returns [Ok false] *)
262259263260(** {1 Statistics} *)
264261265265-(** Total number of rules in the embedded PSL *)
266262val rule_count : t -> int
263263+(** Total number of rules in the embedded PSL *)
267264268268-(** Number of ICANN section rules *)
269265val icann_rule_count : t -> int
266266+(** Number of ICANN section rules *)
270267271271-(** Number of private section rules *)
272268val private_rule_count : t -> int
269269+(** Number of private section rules *)
273270274271(** {1 Version Information} *)
275272273273+val version : t -> string
276274(** Version string from the embedded PSL data.
277275278276 Returns the version identifier from the Public Suffix List source file,
279279- typically in the format ["YYYY-MM-DD_HH-MM-SS_UTC"].
280280-*)
281281-val version : t -> string
277277+ typically in the format ["YYYY-MM-DD_HH-MM-SS_UTC"]. *)
282278279279+val commit : t -> string
283280(** Commit hash from the embedded PSL data.
284281285282 Returns the git commit hash from the Public Suffix List repository
286286- corresponding to the version of the data embedded in this library.
287287-*)
288288-val commit : t -> string
283283+ corresponding to the version of the data embedded in this library. *)
+52-65
lib/publicsuffix_data.mli
···11111212 {1 Public Suffix List Specification}
13131414- The Public Suffix List is maintained by Mozilla and follows the specification
1515- at {{:https://publicsuffix.org/list/} publicsuffix.org}. The list provides
1616- an accurate database of domain name suffixes under which Internet users can
1717- directly register names.
1414+ The Public Suffix List is maintained by Mozilla and follows the
1515+ specification at {{:https://publicsuffix.org/list/} publicsuffix.org}. The
1616+ list provides an accurate database of domain name suffixes under which
1717+ Internet users can directly register names.
18181919 {2 PSL Format and Rules}
20202121 The PSL defines three types of rules:
22222323- - {b Normal rules}: Standard domain suffixes (e.g., [com], [co.uk]).
2424- These match exactly as written.
2323+ - {b Normal rules}: Standard domain suffixes (e.g., [com], [co.uk]). These
2424+ match exactly as written.
25252626 - {b Wildcard rules}: Prefixed with [*.] (e.g., [*.jp]). These match any
2727 single label in that position. For example, [*.example.com] matches
2828 [foo.example.com] but not [example.com] or [bar.foo.example.com].
29293030 - {b Exception rules}: Prefixed with [!] (e.g., [!city.kobe.jp]). These
3131- override wildcard rules and specify that a particular domain {i is not}
3232- a public suffix despite a matching wildcard.
3131+ override wildcard rules and specify that a particular domain {i is not} a
3232+ public suffix despite a matching wildcard.
33333434 {2 Sections}
3535···48484949 Per the PSL specification, the matching algorithm:
50505151- 1. Matches the domain against all rules in the list
5252- 2. If no rules match, applies the implicit [*] wildcard rule
5353- 3. If multiple rules match, exception rules take priority
5454- 4. Otherwise, the rule with the most labels wins
5555- 5. For exception rules, the public suffix is derived by removing the
5656- exception's leftmost label
5151+ 1. Matches the domain against all rules in the list 2. If no rules match,
5252+ applies the implicit [*] wildcard rule 3. If multiple rules match, exception
5353+ rules take priority 4. Otherwise, the rule with the most labels wins 5. For
5454+ exception rules, the public suffix is derived by removing the exception's
5555+ leftmost label
57565857 {1 Data Structure}
59586059 This module represents the PSL as a trie (prefix tree) data structure for
6161- efficient lookup. The trie is constructed with labels in reverse order
6262- (TLD first), allowing efficient traversal from the top-level domain down
6363- to more specific labels.
6060+ efficient lookup. The trie is constructed with labels in reverse order (TLD
6161+ first), allowing efficient traversal from the top-level domain down to more
6262+ specific labels.
64636564 All domain labels in the trie are:
6665 - Converted to lowercase
···71707271 This module is automatically generated during the build process:
73727474- 1. The [gen_psl.ml] code generator reads [public_suffix_list.dat]
7575- 2. It parses each rule according to the PSL specification
7676- 3. It constructs an in-memory trie from all rules
7777- 4. It emits OCaml source code representing the trie
7878- 5. The generated code is compiled into the library
7373+ 1. The [gen_psl.ml] code generator reads [public_suffix_list.dat] 2. It
7474+ parses each rule according to the PSL specification 3. It constructs an
7575+ in-memory trie from all rules 4. It emits OCaml source code representing the
7676+ trie 5. The generated code is compiled into the library
79778080- This approach embeds the entire PSL into the compiled library, requiring
8181- no runtime file I/O or parsing.
7878+ This approach embeds the entire PSL into the compiled library, requiring no
7979+ runtime file I/O or parsing.
82808381 {1 Interface}
84828583 This module is internal to the library. The main library API is exposed
8686- through the {!Publicsuffix} module, which provides high-level functions
8787- for querying the PSL data.
8888-*)
8484+ through the {!Publicsuffix} module, which provides high-level functions for
8585+ querying the PSL data. *)
89869087(** {1 Types} *)
9188···93909491 The PSL is divided into two sections with different governance:
9592 - [ICANN]: Official domains delegated by ICANN or in the IANA root zone
9696- - [Private]: Domains submitted by private parties for their services
9797-*)
9393+ - [Private]: Domains submitted by private parties for their services *)
9894type section = ICANN | Private
999510096(** Rule types defined in the PSL specification.
1019710298 - [Normal]: A standard domain suffix that matches exactly (e.g., [com])
10399 - [Wildcard]: A rule with [*.] prefix that matches any single label
104104- - [Exception]: A rule with [!] prefix that overrides wildcard matches
105105-*)
100100+ - [Exception]: A rule with [!] prefix that overrides wildcard matches *)
106101type rule_type = Normal | Wildcard | Exception
107102103103+type trie_node = {
104104+ rule : (rule_type * section) option;
105105+ children : (string * trie_node) list;
106106+ wildcard_child : trie_node option;
107107+}
108108(** A node in the suffix trie.
109109110110- The trie is constructed with domain labels in reverse order (TLD first).
111111- For example, the domain [example.co.uk] would be traversed as [uk] -> [co] -> [example].
110110+ The trie is constructed with domain labels in reverse order (TLD first). For
111111+ example, the domain [example.co.uk] would be traversed as [uk] -> [co] ->
112112+ [example].
112113113114 - [rule]: If [Some (rt, sec)], this node represents a PSL rule of type [rt]
114115 from section [sec]
115116 - [children]: List of (label, child_node) pairs for exact label matches
116117 - [wildcard_child]: If [Some node], this represents a wildcard match ([*])
117117- at this position in the domain hierarchy
118118-*)
119119-type trie_node = {
120120- rule : (rule_type * section) option;
121121- children : (string * trie_node) list;
122122- wildcard_child : trie_node option;
123123-}
118118+ at this position in the domain hierarchy *)
124119125120(** {1 Data Access} *)
126121122122+val get_root : unit -> trie_node
127123(** Get the root of the suffix trie.
128124129125 The root node represents the starting point for all PSL lookups. Domain
130126 labels should be traversed in reverse order (TLD first) from this root.
131127132132- @return The root trie node containing all PSL rules
133133-*)
134134-val get_root : unit -> trie_node
128128+ @return The root trie node containing all PSL rules *)
135129136130(** {1 Statistics}
137131138132 These values reflect the PSL data at the time this module was generated.
139139- They include all rules from both the ICANN and Private sections.
140140-*)
133133+ They include all rules from both the ICANN and Private sections. *)
141134135135+val rule_count : int
142136(** Total number of rules in the embedded PSL data.
143137144144- This includes all Normal, Wildcard, and Exception rules from both
145145- sections.
138138+ This includes all Normal, Wildcard, and Exception rules from both sections.
146139*)
147147-val rule_count : int
148140141141+val icann_rule_count : int
149142(** Number of rules in the ICANN section.
150143151151- These are official TLD rules delegated by ICANN or present in the IANA
152152- root zone database.
153153-*)
154154-val icann_rule_count : int
144144+ These are official TLD rules delegated by ICANN or present in the IANA root
145145+ zone database. *)
155146147147+val private_rule_count : int
156148(** Number of rules in the Private section.
157149158158- These are rules submitted by private organizations for services that
159159- allow subdomain registration.
160160-*)
161161-val private_rule_count : int
150150+ These are rules submitted by private organizations for services that allow
151151+ subdomain registration. *)
162152163153(** {1 Version Information}
164154165165- These values reflect the version and commit information from the PSL
166166- data at the time this module was generated.
167167-*)
155155+ These values reflect the version and commit information from the PSL data at
156156+ the time this module was generated. *)
168157158158+val version : string
169159(** Version string from the PSL data file.
170160171161 This is the version identifier from the Public Suffix List source file,
172172- typically in the format "YYYY-MM-DD_HH-MM-SS_UTC".
173173-*)
174174-val version : string
162162+ typically in the format "YYYY-MM-DD_HH-MM-SS_UTC". *)
175163164164+val commit : string
176165(** Commit hash from the PSL data file.
177166178167 This is the git commit hash from the Public Suffix List repository
179179- corresponding to the version of the data embedded in this library.
180180-*)
181181-val commit : string
168168+ corresponding to the version of the data embedded in this library. *)
+13-11
test/psl_test.ml
···1515*)
16161717let psl = Publicsuffix.create ()
1818-1918let print_error e = Printf.printf "ERROR: %s\n" (Publicsuffix.error_to_string e)
2020-2121-let print_result = function
2222- | Ok s -> print_endline s
2323- | Error e -> print_error e
1919+let print_result = function Ok s -> print_endline s | Error e -> print_error e
24202521let print_bool_result = function
2622 | Ok b -> print_endline (string_of_bool b)
···28242925let print_result_with_section = function
3026 | Ok (s, sec) ->
3131- let sec_str = match sec with
2727+ let sec_str =
2828+ match sec with
3229 | Publicsuffix.ICANN -> "ICANN"
3330 | Publicsuffix.Private -> "PRIVATE"
3431 in
···4138 print_endline "Commands:";
4239 print_endline " registrable <domain> - Get registrable domain";
4340 print_endline " suffix <domain> - Get public suffix";
4444- print_endline " is_suffix <domain> - Check if domain is a public suffix";
4545- print_endline " is_registrable <domain> - Check if domain is a registrable domain";
4646- print_endline " registrable_section <domain> - Get registrable domain with section";
4141+ print_endline
4242+ " is_suffix <domain> - Check if domain is a public suffix";
4343+ print_endline
4444+ " is_registrable <domain> - Check if domain is a registrable domain";
4545+ print_endline
4646+ " registrable_section <domain> - Get registrable domain with section";
4747 print_endline " suffix_section <domain> - Get public suffix with section";
4848 print_endline " stats - Print rule statistics";
4949 exit 1
···5858 | "is_registrable" when Array.length Sys.argv >= 3 ->
5959 print_bool_result (Publicsuffix.is_registrable_domain psl Sys.argv.(2))
6060 | "registrable_section" when Array.length Sys.argv >= 3 ->
6161- print_result_with_section (Publicsuffix.registrable_domain_with_section psl Sys.argv.(2))
6161+ print_result_with_section
6262+ (Publicsuffix.registrable_domain_with_section psl Sys.argv.(2))
6263 | "suffix_section" when Array.length Sys.argv >= 3 ->
6363- print_result_with_section (Publicsuffix.public_suffix_with_section psl Sys.argv.(2))
6464+ print_result_with_section
6565+ (Publicsuffix.public_suffix_with_section psl Sys.argv.(2))
6466 | "stats" ->
6567 Printf.printf "Total rules: %d\n" (Publicsuffix.rule_count psl);
6668 Printf.printf "ICANN rules: %d\n" (Publicsuffix.icann_rule_count psl);