···12121313(* Helper utilities *)
14141515+(** Character predicates *)
1616+1517let is_whitespace = function
1618 | ' ' | '\t' | '\n' | '\r' | '\012' (* FF *) -> true
1719 | _ -> false
18201921let is_ascii_digit = function '0' .. '9' -> true | _ -> false
20222323+let is_lower_alpha = function 'a' .. 'z' -> true | _ -> false
2424+2525+let is_upper_alpha = function 'A' .. 'Z' -> true | _ -> false
2626+2727+let is_alpha c = is_lower_alpha c || is_upper_alpha c
2828+2929+let is_alphanumeric c = is_alpha c || is_ascii_digit c
3030+3131+let is_hex_digit = function
3232+ | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
3333+ | _ -> false
3434+3535+(** Case conversion *)
3636+2137let to_ascii_lowercase c =
2238 match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c
23392440let string_to_ascii_lowercase s =
2541 String.map to_ascii_lowercase s
4242+4343+(** String predicates *)
4444+4545+let is_non_empty s = String.trim s <> ""
4646+4747+let is_all_digits s = String.length s > 0 && String.for_all is_ascii_digit s
4848+4949+let is_all_alpha s = String.length s > 0 && String.for_all is_alpha s
5050+5151+let is_all_alphanumeric s = String.length s > 0 && String.for_all is_alphanumeric s
26522753let trim_html_spaces s =
2854 let len = String.length s in
+36-1
lib/check/datatype/datatype.mli
···2727(** Check if a value is valid *)
2828val is_valid : t -> string -> bool
29293030-(** Helper utilities for implementing datatype validators. *)
3030+(** {1 Helper utilities for implementing datatype validators} *)
3131+3232+(** {2 Character predicates} *)
31333234(** Check if a character is HTML5 whitespace (space, tab, LF, FF, or CR). *)
3335val is_whitespace : char -> bool
···3537(** Check if a character is an ASCII digit (0-9). *)
3638val is_ascii_digit : char -> bool
37394040+(** Check if a character is a lowercase ASCII letter (a-z). *)
4141+val is_lower_alpha : char -> bool
4242+4343+(** Check if a character is an uppercase ASCII letter (A-Z). *)
4444+val is_upper_alpha : char -> bool
4545+4646+(** Check if a character is an ASCII letter (a-z or A-Z). *)
4747+val is_alpha : char -> bool
4848+4949+(** Check if a character is an ASCII letter or digit. *)
5050+val is_alphanumeric : char -> bool
5151+5252+(** Check if a character is a hexadecimal digit (0-9, a-f, A-F). *)
5353+val is_hex_digit : char -> bool
5454+5555+(** {2 Case conversion} *)
5656+3857(** Convert an ASCII character to lowercase. *)
3958val to_ascii_lowercase : char -> char
40594160(** Convert an ASCII string to lowercase. *)
4261val string_to_ascii_lowercase : string -> string
6262+6363+(** {2 String predicates} *)
6464+6565+(** Check if a string has non-whitespace content after trimming. *)
6666+val is_non_empty : string -> bool
6767+6868+(** Check if all characters in a non-empty string are ASCII digits. *)
6969+val is_all_digits : string -> bool
7070+7171+(** Check if all characters in a non-empty string are ASCII letters. *)
7272+val is_all_alpha : string -> bool
7373+7474+(** Check if all characters in a non-empty string are ASCII letters or digits. *)
7575+val is_all_alphanumeric : string -> bool
7676+7777+(** {2 String manipulation} *)
43784479(** Trim HTML5 whitespace from both ends of a string. *)
4580val trim_html_spaces : string -> string
+6-8
lib/check/datatype/dt_autocomplete.ml
···44let is_whitespace = Datatype.is_whitespace
55let to_ascii_lowercase = Datatype.to_ascii_lowercase
6677+(* Use Astring for string operations *)
88+let is_prefix = Astring.String.is_prefix
99+710(** Trim whitespace from string and collapse internal whitespace *)
811let trim_whitespace s =
912 let s = String.trim s in
···104107(** Split string on whitespace - uses shared utility *)
105108let split_on_whitespace = Datatype.split_on_whitespace
106109107107-(** Check if string starts with prefix *)
108108-let starts_with s prefix =
109109- String.length s >= String.length prefix
110110- && String.sub s 0 (String.length prefix) = prefix
111111-112110(** Validate detail tokens *)
113111let check_tokens tokens =
114112 let tokens = ref tokens in
···116114117115 (* Check for section-* *)
118116 (match !tokens with
119119- | token :: rest when starts_with token "section-" ->
117117+ | token :: rest when is_prefix ~affix:"section-" token ->
120118 tokens := rest
121119 | _ -> ());
122120···145143146144 (* Check if any token in the list is a section-* indicator *)
147145 let find_section tokens =
148148- List.find_opt (fun t -> starts_with t "section-") tokens
146146+ List.find_opt (fun t -> is_prefix ~affix:"section-" t) tokens
149147 in
150148151149 (* Check if webauthn appears anywhere except as the very last token *)
···207205 (Printf.sprintf
208206 "The token \"%s\" must only appear before any autofill field names."
209207 token)
210210- | token :: _ when starts_with token "section-" ->
208208+ | token :: _ when is_prefix ~affix:"section-" token ->
211209 Error
212210 "A \"section-*\" indicator must only appear as the first token in a \
213211 list of autofill detail tokens."
+3-6
lib/check/datatype/dt_charset.ml
···11(** Helper functions for charset validation *)
2233let is_valid_charset_char c =
44- (c >= '0' && c <= '9') ||
55- (c >= 'a' && c <= 'z') ||
66- (c >= 'A' && c <= 'Z') ||
44+ Datatype.is_alphanumeric c ||
75 c = '-' || c = '!' || c = '#' || c = '$' || c = '%' || c = '&' ||
86 c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' ||
97 c = '~' || c = '^'
1081111-let to_lower s = String.lowercase_ascii s
99+let to_lower = Datatype.string_to_ascii_lowercase
12101311(** Common encoding labels recognized by WHATWG Encoding Standard.
1412 This is a subset of the full list. *)
···7472module Meta_charset = struct
7573 let name = "legacy character encoding declaration"
76747777- let is_whitespace c =
7878- c = ' ' || c = '\t' || c = '\n' || c = '\012' || c = '\r'
7575+ let is_whitespace = Datatype.is_whitespace
79768077 let validate s =
8178 let lower = to_lower s in
+1-2
lib/check/datatype/dt_color.ml
···154154 ]
155155156156(** Check if character is hex digit *)
157157-let is_hex_digit c =
158158- (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
157157+let is_hex_digit = Datatype.is_hex_digit
159158160159(** Validate hex color (#RGB or #RRGGBB) *)
161160let validate_hex_color s =
+1-4
lib/check/datatype/dt_datetime.ml
···11(** Helper functions for datetime validation *)
2233-let is_digit c = c >= '0' && c <= '9'
44-55-let is_all_digits s =
66- String.for_all is_digit s
33+let is_all_digits = Datatype.is_all_digits
7485let parse_int s =
96 try Some (int_of_string s)
+1-3
lib/check/datatype/dt_email.ml
···2233(** Helper to check if a character is valid in email local/domain parts *)
44let is_email_char c =
55- (c >= 'a' && c <= 'z')
66- || (c >= 'A' && c <= 'Z')
77- || (c >= '0' && c <= '9')
55+ Datatype.is_alphanumeric c
86 || c = '.' || c = '-' || c = '_' || c = '+' || c = '='
97108(** Validate a single email address using simplified rules *)
+4-17
lib/check/datatype/dt_language.ml
···2233let q = Error_code.q
4455-let is_lower_alpha c = c >= 'a' && c <= 'z'
66-let is_upper_alpha c = c >= 'A' && c <= 'Z'
77-let is_alpha c = is_lower_alpha c || is_upper_alpha c
88-let is_digit c = c >= '0' && c <= '9'
99-let is_alphanumeric c = is_alpha c || is_digit c
1010-1111-let is_all_alpha s =
1212- String.for_all is_alpha s
1313-1414-let _is_all_digits s =
1515- String.for_all is_digit s
1616-1717-let is_all_alphanumeric s =
1818- String.for_all is_alphanumeric s
1919-2020-let to_lower s =
2121- String.lowercase_ascii s
55+(* Use shared character predicates from Datatype *)
66+let is_all_alpha = Datatype.is_all_alpha
77+let is_all_alphanumeric = Datatype.is_all_alphanumeric
88+let to_lower = Datatype.string_to_ascii_lowercase
2292310(** Valid extlang subtags per IANA language-subtag-registry.
2411 Extlangs are 3-letter subtags that follow the primary language.
+20-26
lib/check/datatype/dt_media_query.ml
···7070(** Media query keywords (unused but kept for documentation) *)
7171let _media_keywords = [ "and"; "not"; "only" ]
72727373-(** Check if character is whitespace *)
7474-let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
7373+let is_whitespace = Datatype.is_whitespace
75747675(** Check if character can start an identifier *)
7776let is_ident_start c =
7878- (c >= 'a' && c <= 'z')
7979- || (c >= 'A' && c <= 'Z')
8080- || c = '_' || c = '-' || Char.code c >= 128
7777+ Datatype.is_alpha c || c = '_' || c = '-' || Char.code c >= 128
81788279(** Check if character can be in an identifier *)
8380let is_ident_char c =
8484- is_ident_start c || (c >= '0' && c <= '9')
8181+ is_ident_start c || Datatype.is_ascii_digit c
85828686-(** Unicode case-fold for Turkish dotted-I etc *)
8787-let lowercase_unicode s =
8888- (* Handle special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE -> i *)
8383+(** Unicode case folding for case-insensitive comparison.
8484+8585+ Uses the Uucp library for proper Unicode case folding, which handles
8686+ special cases like Turkish dotted-I (U+0130 -> 'i' + U+0307) correctly. *)
8787+let case_fold s =
8988 let buf = Buffer.create (String.length s) in
9090- let i = ref 0 in
9191- while !i < String.length s do
9292- let c = s.[!i] in
9393- if c = '\xc4' && !i + 1 < String.length s && s.[!i + 1] = '\xb0' then begin
9494- (* U+0130 -> 'i' + U+0307 (combining dot above), but for simplicity just 'i' followed by U+0307 *)
9595- Buffer.add_string buf "i\xcc\x87";
9696- i := !i + 2
9797- end else begin
9898- Buffer.add_char buf (Char.lowercase_ascii c);
9999- incr i
100100- end
101101- done;
8989+ let add_uchar u = Uutf.Buffer.add_utf_8 buf u in
9090+ let fold_char () _pos = function
9191+ | `Malformed _ -> () (* Skip malformed sequences *)
9292+ | `Uchar u ->
9393+ match Uucp.Case.Fold.fold u with
9494+ | `Self -> add_uchar u
9595+ | `Uchars us -> List.iter add_uchar us
9696+ in
9797+ Uutf.String.fold_utf_8 fold_char () s;
10298 Buffer.contents buf
10399104100(** Check balanced parentheses *)
···222218 match read_ident () with
223219 | None -> Error "Parse Error."
224220 | Some media_type ->
225225- let mt_lower = lowercase_unicode media_type in
221221+ let mt_lower = case_fold media_type in
226222 (* Check for deprecated media type *)
227223 if List.mem mt_lower deprecated_media_types then
228224 Error (Printf.sprintf "The media \"%s\" has been deprecated" mt_lower)
···341337 if List.mem feature length_features then begin
342338 (* Must be a valid length: number followed by unit *)
343339 let value = String.trim value in
344344- let is_digit c = c >= '0' && c <= '9' in
345340346341 (* Parse number - includes sign, digits, and decimal point *)
347342 let i = ref 0 in
348343 let len = String.length value in
349349- while !i < len && (is_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
344344+ while !i < len && (Datatype.is_ascii_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
350345 incr i
351346 done;
352347 let num_part = String.sub value 0 !i in
···377372 end else if List.mem feature color_features then begin
378373 (* Must be an integer *)
379374 let value = String.trim value in
380380- let is_digit c = c >= '0' && c <= '9' in
381381- if String.length value > 0 && String.for_all is_digit value then Ok ()
375375+ if Datatype.is_all_digits value then Ok ()
382376 else
383377 Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
384378 end else
+1-2
lib/check/datatype/dt_mime.ml
···11(** MIME type validation based on RFC 2045 and HTML5 spec *)
2233-(** Check if character is whitespace *)
44-let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
33+let is_whitespace = Datatype.is_whitespace
5465(** Check if character is a token character (RFC 2045) *)
76let is_token_char c =
···11-(** H1 element counter - warns about multiple h1 elements in a document. *)
22-33-type state = {
44- mutable h1_count : int;
55- mutable svg_depth : int; (* Track depth inside SVG *)
66-}
77-88-let create () = {
99- h1_count = 0;
1010- svg_depth = 0;
1111-}
1212-1313-let reset state =
1414- state.h1_count <- 0;
1515- state.svg_depth <- 0
1616-1717-let start_element state ~element collector =
1818- (* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *)
1919- match element.Element.tag with
2020- | Tag.Svg _ ->
2121- state.svg_depth <- state.svg_depth + 1
2222- | Tag.Html `H1 when state.svg_depth = 0 ->
2323- state.h1_count <- state.h1_count + 1;
2424- if state.h1_count > 1 then
2525- Message_collector.add_typed collector (`Misc `Multiple_h1)
2626- | Tag.Html _ when state.svg_depth = 0 ->
2727- () (* Other HTML elements outside SVG *)
2828- | _ ->
2929- () (* Non-HTML or inside SVG *)
3030-3131-let end_element state ~tag _collector =
3232- match tag with
3333- | Tag.Svg _ when state.svg_depth > 0 ->
3434- state.svg_depth <- state.svg_depth - 1
3535- | _ -> ()
3636-3737-let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-16
lib/check/specialized/h1_checker.mli
···11-(** H1 element counter checker.
22-33- This checker validates that documents don't have multiple h1 elements,
44- which can confuse document structure and accessibility tools.
55-66- {2 Validation Rules}
77-88- - Documents should have at most one [<h1>] element
99- - [<h1>] elements inside SVG content (foreignObject, desc) are not counted
1010-1111- {2 Error Messages}
1212-1313- - [Multiple_h1]: Document contains more than one h1 element *)
1414-1515-val checker : Checker.t
1616-(** The H1 checker instance. *)
+19-107
lib/check/specialized/heading_checker.ml
···11(** Heading structure validation checker.
2233 This checker validates that:
44- - Heading levels don't skip (e.g., h1 to h3)
55- - Documents have at least one heading
66- - Multiple h1 usage is noted
77- - Headings are not empty *)
44+ - Multiple h1 usage is reported as an error
55+66+ Note: Additional accessibility checks (first heading should be h1, skipped
77+ levels, empty headings) are intentionally not included as errors since they
88+ are recommendations rather than HTML5 spec requirements. *)
89910(** Checker state tracking heading structure. *)
1011type state = {
1111- mutable current_level : int option;
1212 mutable h1_count : int;
1313- mutable has_any_heading : bool;
1414- mutable first_heading_checked : bool;
1515- mutable in_heading : Tag.html_tag option;
1616- mutable heading_has_text : bool;
1713 mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *)
1814}
19152020-let create () =
2121- {
2222- current_level = None;
2323- h1_count = 0;
2424- has_any_heading = false;
2525- first_heading_checked = false;
2626- in_heading = None;
2727- heading_has_text = false;
2828- svg_depth = 0;
2929- }
1616+let create () = {
1717+ h1_count = 0;
1818+ svg_depth = 0;
1919+}
30203121let reset state =
3232- state.current_level <- None;
3322 state.h1_count <- 0;
3434- state.has_any_heading <- false;
3535- state.first_heading_checked <- false;
3636- state.in_heading <- None;
3737- state.heading_has_text <- false;
3823 state.svg_depth <- 0
39244040-(** Check if text is effectively empty (only whitespace). *)
4141-let is_empty_text text =
4242- let rec check i =
4343- if i >= String.length text then
4444- true
4545- else
4646- match text.[i] with
4747- | ' ' | '\t' | '\n' | '\r' -> check (i + 1)
4848- | _ -> false
4949- in
5050- check 0
5151-5225let start_element state ~element collector =
5326 match element.Element.tag with
5427 | Tag.Svg _ ->
5555- (* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *)
2828+ (* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *)
5629 state.svg_depth <- state.svg_depth + 1
5757- | Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 ->
5858- let level = match Tag.heading_level h with Some l -> l | None -> 0 in
5959- let name = Tag.html_tag_to_string h in
6060- state.has_any_heading <- true;
6161-6262- (* Check if this is the first heading *)
6363- if not state.first_heading_checked then begin
6464- state.first_heading_checked <- true;
6565- if level <> 1 then
6666- Message_collector.add_typed collector
6767- (`Generic (Printf.sprintf
6868- "First heading in document is <%s>, should typically be <h1>" name))
6969- end;
7070-7171- (* Track h1 count *)
7272- if level = 1 then begin
7373- state.h1_count <- state.h1_count + 1;
7474- if state.h1_count > 1 then
7575- Message_collector.add_typed collector (`Misc `Multiple_h1)
7676- end;
7777-7878- (* Check for skipped levels *)
7979- begin match state.current_level with
8080- | None ->
8181- state.current_level <- Some level
8282- | Some prev_level ->
8383- let diff = level - prev_level in
8484- if diff > 1 then
8585- Message_collector.add_typed collector
8686- (`Generic (Printf.sprintf
8787- "Heading level skipped: <%s> follows <h%d>, skipping %d level%s. This can confuse screen reader users"
8888- name prev_level (diff - 1) (if diff > 2 then "s" else "")));
8989- state.current_level <- Some level
9090- end;
9191-9292- (* Track that we're in a heading to check for empty content *)
9393- state.in_heading <- Some h;
9494- state.heading_has_text <- false
3030+ | Tag.Html `H1 when state.svg_depth = 0 ->
3131+ state.h1_count <- state.h1_count + 1;
3232+ if state.h1_count > 1 then
3333+ Message_collector.add_typed collector (`Misc `Multiple_h1)
9534 | _ -> ()
96359797-let end_element state ~tag collector =
9898- (* Track SVG depth *)
9999- (match tag with
100100- | Tag.Svg _ when state.svg_depth > 0 ->
101101- state.svg_depth <- state.svg_depth - 1
102102- | _ -> ());
103103- (* Check for empty headings *)
104104- match state.in_heading, tag with
105105- | Some h, Tag.Html h2 when h = h2 ->
106106- if not state.heading_has_text then
107107- Message_collector.add_typed collector
108108- (`Generic (Printf.sprintf
109109- "Heading <%s> is empty or contains only whitespace. Empty headings are problematic for screen readers"
110110- (Tag.html_tag_to_string h)));
111111- state.in_heading <- None;
112112- state.heading_has_text <- false
3636+let end_element state ~tag _collector =
3737+ match tag with
3838+ | Tag.Svg _ when state.svg_depth > 0 ->
3939+ state.svg_depth <- state.svg_depth - 1
11340 | _ -> ()
11441115115-let characters state text _collector =
116116- (* If we're inside a heading, check if this text is non-whitespace *)
117117- match state.in_heading with
118118- | Some _ ->
119119- if not (is_empty_text text) then
120120- state.heading_has_text <- true
121121- | None ->
122122- ()
123123-124124-let end_document state collector =
125125- if not state.has_any_heading then
126126- Message_collector.add_typed collector
127127- (`Generic "Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility")
128128-129129-let checker = Checker.make ~create ~reset ~start_element ~end_element
130130- ~characters ~end_document ()
4242+let checker = Checker.make ~create ~reset ~start_element ~end_element ()