···11-(** Interactive element nesting checker implementation. *)
11+(** Interactive element nesting checker implementation.
2233-(** Special ancestors that need tracking for nesting validation.
33+ Uses bool arrays instead of bitmasks for JavaScript compatibility
44+ (JS bitwise ops are limited to 32 bits). *)
4555- This array defines the elements whose presence in the ancestor chain
66- affects validation of descendant elements. The order is significant
77- as it determines bit positions in the ancestor bitmask. *)
66+(** Special ancestors that need tracking for nesting validation. *)
87let special_ancestors =
98 [| "a"; "address"; "body"; "button"; "caption"; "dfn"; "dt"; "figcaption";
109 "figure"; "footer"; "form"; "header"; "label"; "map"; "noscript"; "th";
···1312 "s"; "small"; "mark"; "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp";
1413 "kbd"; "var" |]
15141616-(** Hashtable for O(1) lookup of special ancestor bit positions *)
1515+let num_ancestors = Array.length special_ancestors
1616+1717+(** Hashtable for O(1) lookup of special ancestor indices *)
1718let special_ancestor_table : (string, int) Hashtbl.t =
1819 let tbl = Hashtbl.create 64 in
1920 Array.iteri (fun i name -> Hashtbl.add tbl name i) special_ancestors;
2021 tbl
21222222-(** Get the bit position for a special ancestor element.
2323- Returns [-1] if the element is not a special ancestor. O(1) lookup. *)
2424-let special_ancestor_number name =
2323+(** Get the index for a special ancestor element.
2424+ Returns [-1] if the element is not a special ancestor. *)
2525+let special_ancestor_index name =
2526 match Hashtbl.find_opt special_ancestor_table name with
2627 | Some i -> i
2728 | None -> -1
···3132 [| "a"; "button"; "details"; "embed"; "iframe"; "label"; "select";
3233 "textarea" |]
33343434-(** Map from descendant element name to bitmask of prohibited ancestors. *)
3535-let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
3535+(** Create an empty bool array for ancestor tracking *)
3636+let empty_flags () = Array.make num_ancestors false
3737+3838+(** Copy a bool array *)
3939+let copy_flags flags = Array.copy flags
4040+4141+(** Map from descendant element name to prohibited ancestor flags. *)
4242+let prohibited_ancestors_by_descendant : (string, bool array) Hashtbl.t =
3643 Hashtbl.create 64
37443838-(** Map from descendant element name to bitmask of ancestors that cause content model violations.
3939- (These use different error messages than nesting violations.) *)
4040-let content_model_violation_mask : (string, int) Hashtbl.t =
4545+(** Map from descendant element name to content model violation flags. *)
4646+let content_model_violations : (string, bool array) Hashtbl.t =
4147 Hashtbl.create 64
42484949+(** Get or create prohibited ancestors array for a descendant *)
5050+let get_prohibited descendant =
5151+ match Hashtbl.find_opt prohibited_ancestors_by_descendant descendant with
5252+ | Some arr -> arr
5353+ | None ->
5454+ let arr = empty_flags () in
5555+ Hashtbl.replace prohibited_ancestors_by_descendant descendant arr;
5656+ arr
5757+5858+(** Get or create content model violations array for a descendant *)
5959+let get_content_model_violations descendant =
6060+ match Hashtbl.find_opt content_model_violations descendant with
6161+ | Some arr -> arr
6262+ | None ->
6363+ let arr = empty_flags () in
6464+ Hashtbl.replace content_model_violations descendant arr;
6565+ arr
6666+4367(** Register that [ancestor] is prohibited for [descendant]. *)
4468let register_prohibited_ancestor ancestor descendant =
4545- let number = special_ancestor_number ancestor in
4646- if number = -1 then
6969+ let idx = special_ancestor_index ancestor in
7070+ if idx = -1 then
4771 failwith ("Ancestor not found in array: " ^ ancestor);
4848- let mask =
4949- match Hashtbl.find_opt ancestor_mask_by_descendant descendant with
5050- | None -> 0
5151- | Some m -> m
5252- in
5353- let new_mask = mask lor (1 lsl number) in
5454- Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
7272+ let arr = get_prohibited descendant in
7373+ arr.(idx) <- true
55745675(** Register a content model violation (phrasing-only element containing flow content). *)
5776let register_content_model_violation ancestor descendant =
5877 register_prohibited_ancestor ancestor descendant;
5959- let number = special_ancestor_number ancestor in
6060- let mask =
6161- match Hashtbl.find_opt content_model_violation_mask descendant with
6262- | None -> 0
6363- | Some m -> m
6464- in
6565- let new_mask = mask lor (1 lsl number) in
6666- Hashtbl.replace content_model_violation_mask descendant new_mask
7878+ let idx = special_ancestor_index ancestor in
7979+ let arr = get_content_model_violations descendant in
8080+ arr.(idx) <- true
67816882(** Initialize the prohibited ancestor map. *)
6983let () =
···133147 ) interactive_elements;
134148135149 (* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
136136- (* These are content model violations, not nesting violations. *)
137150 let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
138151 "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
139152 let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···145158 ) flow_content
146159 ) phrasing_only
147160148148-(** Bitmask constants for common checks. *)
149149-let a_button_mask =
150150- let a_num = special_ancestor_number "a" in
151151- let button_num = special_ancestor_number "button" in
152152- (1 lsl a_num) lor (1 lsl button_num)
153153-154154-let map_mask =
155155- let map_num = special_ancestor_number "map" in
156156- 1 lsl map_num
161161+(** Indices for common checks *)
162162+let a_index = special_ancestor_index "a"
163163+let button_index = special_ancestor_index "button"
164164+let map_index = special_ancestor_index "map"
157165158158-(** Transparent elements - inherit content model from parent. O(1) hashtable lookup. *)
166166+(** Transparent elements - inherit content model from parent. *)
159167let transparent_elements_tbl =
160168 Attr_utils.hashtbl_of_list ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"]
161169···163171164172(** Stack node representing an element's context. *)
165173type stack_node = {
166166- ancestor_mask : int;
174174+ ancestor_flags : bool array;
167175 name : string;
168176 is_transparent : bool;
169177}
···171179(** Checker state. *)
172180type state = {
173181 mutable stack : stack_node list;
174174- mutable ancestor_mask : int;
182182+ mutable ancestor_flags : bool array;
175183}
176184177185let create () =
178178- { stack = []; ancestor_mask = 0 }
186186+ { stack = []; ancestor_flags = empty_flags () }
179187180188let reset state =
181189 state.stack <- [];
182182- state.ancestor_mask <- 0
190190+ state.ancestor_flags <- empty_flags ()
183191184192(** Get attribute value by name from attribute list. *)
185193let get_attr attrs name =
···192200(** Check if element is interactive based on its attributes. *)
193201let is_interactive_element name attrs =
194202 match name with
195195- | "a" ->
196196- has_attr attrs "href"
197197- | "audio" | "video" ->
198198- has_attr attrs "controls"
199199- | "img" | "object" ->
200200- has_attr attrs "usemap"
203203+ | "a" -> has_attr attrs "href"
204204+ | "audio" | "video" -> has_attr attrs "controls"
205205+ | "img" | "object" -> has_attr attrs "usemap"
201206 | "input" ->
202202- begin match get_attr attrs "type" with
203203- | Some "hidden" -> false
204204- | _ -> true
205205- end
207207+ (match get_attr attrs "type" with
208208+ | Some "hidden" -> false
209209+ | _ -> true)
206210 | "button" | "details" | "embed" | "iframe" | "label" | "select"
207207- | "textarea" ->
208208- true
209209- | _ ->
210210- false
211211+ | "textarea" -> true
212212+ | _ -> false
211213212212-(** Find the nearest transparent element in the ancestor stack, if any.
213213- Returns the immediate parent's name if it's transparent, otherwise None. *)
214214+(** Find the nearest transparent element in the ancestor stack. *)
214215let find_nearest_transparent_parent state =
215216 match state.stack with
216217 | parent :: _ when parent.is_transparent -> Some parent.name
···218219219220(** Report nesting violations. *)
220221let check_nesting state name attrs collector =
221221- (* Compute the prohibited ancestor mask for this element *)
222222- let base_mask =
223223- match Hashtbl.find_opt ancestor_mask_by_descendant name with
224224- | Some m -> m
225225- | None -> 0
222222+ (* Get prohibited ancestors for this element *)
223223+ let prohibited =
224224+ match Hashtbl.find_opt prohibited_ancestors_by_descendant name with
225225+ | Some arr -> arr
226226+ | None -> empty_flags ()
226227 in
227228228228- (* Get content model violation mask for this element *)
229229- let content_model_mask =
230230- match Hashtbl.find_opt content_model_violation_mask name with
231231- | Some m -> m
232232- | None -> 0
229229+ (* Get content model violations for this element *)
230230+ let content_violations =
231231+ match Hashtbl.find_opt content_model_violations name with
232232+ | Some arr -> arr
233233+ | None -> empty_flags ()
233234 in
234235235235- (* Add interactive element restrictions if applicable *)
236236- let mask =
237237- if is_interactive_element name attrs then
238238- base_mask lor a_button_mask
239239- else
240240- base_mask
236236+ (* Check if element is interactive (adds a/button restrictions) *)
237237+ let is_interactive = is_interactive_element name attrs in
238238+239239+ (* Determine attribute to mention in error messages *)
240240+ let attr =
241241+ match name with
242242+ | "a" when has_attr attrs "href" -> Some "href"
243243+ | "audio" when has_attr attrs "controls" -> Some "controls"
244244+ | "video" when has_attr attrs "controls" -> Some "controls"
245245+ | "img" when has_attr attrs "usemap" -> Some "usemap"
246246+ | "object" when has_attr attrs "usemap" -> Some "usemap"
247247+ | _ -> None
241248 in
242249243243- (* Check for violations *)
244244- if mask <> 0 then begin
245245- let mask_hit = state.ancestor_mask land mask in
246246- if mask_hit <> 0 then begin
247247- (* Determine if element has a special attribute to mention *)
248248- let attr =
249249- match name with
250250- | "a" when has_attr attrs "href" -> Some "href"
251251- | "audio" when has_attr attrs "controls" -> Some "controls"
252252- | "video" when has_attr attrs "controls" -> Some "controls"
253253- | "img" when has_attr attrs "usemap" -> Some "usemap"
254254- | "object" when has_attr attrs "usemap" -> Some "usemap"
255255- | _ -> None
250250+ (* Find transparent parent if any *)
251251+ let transparent_parent = find_nearest_transparent_parent state in
252252+253253+ (* Check each special ancestor *)
254254+ Array.iteri (fun i ancestor ->
255255+ (* Is this ancestor in our current ancestor chain? *)
256256+ if state.ancestor_flags.(i) then begin
257257+ (* Is this ancestor prohibited for this element? *)
258258+ let is_prohibited =
259259+ prohibited.(i) ||
260260+ (is_interactive && (i = a_index || i = button_index))
256261 in
257257- (* Find the transparent parent (like canvas) if any *)
258258- let transparent_parent = find_nearest_transparent_parent state in
259259- (* Find which ancestors are violated *)
260260- Array.iteri (fun i ancestor ->
261261- let bit = 1 lsl i in
262262- if (mask_hit land bit) <> 0 then begin
263263- (* Check if this is a content model violation or a nesting violation *)
264264- if (content_model_mask land bit) <> 0 then begin
265265- (* Content model violation: use "not allowed as child" format *)
266266- (* If there's a transparent parent, use that instead of the ancestor *)
267267- let parent = match transparent_parent with
268268- | Some p -> p
269269- | None -> ancestor
270270- in
271271- Message_collector.add_typed collector
272272- (`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
273273- end else
274274- (* Nesting violation: use "must not be descendant" format *)
275275- Message_collector.add_typed collector
276276- (`Element (`Must_not_descend (`Elem name, `Attr attr, `Ancestor ancestor)))
277277- end
278278- ) special_ancestors
262262+ if is_prohibited then begin
263263+ (* Is this a content model violation or a nesting violation? *)
264264+ if content_violations.(i) then begin
265265+ (* Content model violation: use "not allowed as child" format *)
266266+ let parent = match transparent_parent with
267267+ | Some p -> p
268268+ | None -> ancestor
269269+ in
270270+ Message_collector.add_typed collector
271271+ (`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
272272+ end else
273273+ (* Nesting violation: use "must not be descendant" format *)
274274+ Message_collector.add_typed collector
275275+ (`Element (`Must_not_descend (`Elem name, `Attr attr, `Ancestor ancestor)))
276276+ end
279277 end
280280- end
278278+ ) special_ancestors
281279282280(** Check for required ancestors. *)
283281let check_required_ancestors state name collector =
284282 match name with
285283 | "area" ->
286286- if (state.ancestor_mask land map_mask) = 0 then
284284+ if not state.ancestor_flags.(map_index) then
287285 Message_collector.add_typed collector
288286 (`Generic (Printf.sprintf "The %s element must have a %s ancestor."
289287 (Error_code.q "area") (Error_code.q "map")))
290288 | _ -> ()
291289292292-(** Check for metadata-only elements appearing outside valid contexts.
293293- style element is only valid in head or in noscript (in head). *)
290290+(** Check for metadata-only elements appearing outside valid contexts. *)
294291let check_metadata_element_context state name collector =
295292 match name with
296293 | "style" ->
297297- (* style is only valid inside head or noscript *)
298298- begin match state.stack with
299299- | parent :: _ when parent.name = "head" -> () (* valid *)
300300- | parent :: _ when parent.name = "noscript" -> () (* valid in noscript in head *)
301301- | parent :: _ ->
302302- (* style inside any other element is not allowed *)
303303- Message_collector.add_typed collector
304304- (`Element (`Not_allowed_as_child (`Child "style", `Parent parent.name)))
305305- | [] -> () (* at root level, would be caught elsewhere *)
306306- end
294294+ (match state.stack with
295295+ | parent :: _ when parent.name = "head" -> ()
296296+ | parent :: _ when parent.name = "noscript" -> ()
297297+ | parent :: _ ->
298298+ Message_collector.add_typed collector
299299+ (`Element (`Not_allowed_as_child (`Child "style", `Parent parent.name)))
300300+ | [] -> ())
307301 | _ -> ()
308302309303let start_element state ~element collector =
310310- (* Only check HTML elements, not SVG or MathML *)
311304 match element.Element.tag with
312305 | Tag.Html _ ->
313306 let name = Tag.tag_to_string element.tag in
314307 let attrs = element.raw_attrs in
308308+315309 (* Check for nesting violations *)
316310 check_nesting state name attrs collector;
317311 check_required_ancestors state name collector;
318312 check_metadata_element_context state name collector;
319313320320- (* Update ancestor mask if this is a special ancestor *)
321321- let new_mask = state.ancestor_mask in
322322- let number = special_ancestor_number name in
323323- let new_mask =
324324- if number >= 0 then
325325- new_mask lor (1 lsl number)
326326- else
327327- new_mask
328328- in
314314+ (* Create new flags, copying current state *)
315315+ let new_flags = copy_flags state.ancestor_flags in
329316330330- (* Add href tracking for <a> elements *)
331331- let new_mask =
332332- if name = "a" && has_attr attrs "href" then
333333- let a_num = special_ancestor_number "a" in
334334- new_mask lor (1 lsl a_num)
335335- else
336336- new_mask
337337- in
317317+ (* Set flag if this is a special ancestor *)
318318+ let idx = special_ancestor_index name in
319319+ if idx >= 0 then
320320+ new_flags.(idx) <- true;
338321339339- (* Push onto stack *)
322322+ (* Push onto stack (save old flags) *)
340323 let is_transparent = is_transparent_element name in
341341- let node = { ancestor_mask = state.ancestor_mask; name; is_transparent } in
324324+ let node = { ancestor_flags = state.ancestor_flags; name; is_transparent } in
342325 state.stack <- node :: state.stack;
343343- state.ancestor_mask <- new_mask
344344- | _ -> () (* SVG, MathML, Custom, Unknown *)
326326+ state.ancestor_flags <- new_flags
327327+ | _ -> ()
345328346329let end_element state ~tag _collector =
347347- (* Only track HTML elements *)
348330 match tag with
349331 | Tag.Html _ ->
350350- (* Pop from stack and restore ancestor mask *)
351351- begin match state.stack with
352352- | [] -> () (* Should not happen in well-formed documents *)
353353- | node :: rest ->
354354- state.stack <- rest;
355355- state.ancestor_mask <- node.ancestor_mask
356356- end
332332+ (match state.stack with
333333+ | [] -> ()
334334+ | node :: rest ->
335335+ state.stack <- rest;
336336+ state.ancestor_flags <- node.ancestor_flags)
357337 | _ -> ()
358338359359-(** Create the checker as a first-class module. *)
360339let checker = Checker.make ~create ~reset ~start_element ~end_element ()