tangled
alpha
login
or
join now
anil.recoil.org
/
ocaml-html5rw
1
fork
atom
OCaml HTML5 parser/serialiser based on Python's JustHTML
1
fork
atom
overview
issues
pulls
pipelines
fixes
anil.recoil.org
3 months ago
3d9bebb1
24a4e057
+25
-77
2 changed files
expand all
collapse all
unified
split
test
test_serializer.ml
test_tokenizer.ml
+12
-45
test/test_serializer.ml
···
180
180
181
181
type token_info = {
182
182
token : token_type option;
183
183
-
node : Dom.node option; (* Legacy for compatibility *)
184
184
-
tag_name : string option;
185
185
-
is_empty_tag : bool;
186
183
}
187
184
188
185
let build_token_info token =
189
186
let arr = json_array token in
190
187
match arr with
191
191
-
| [] -> { token = None; node = None; tag_name = None; is_empty_tag = false }
188
188
+
| [] -> { token = None }
192
189
| type_json :: rest ->
193
190
let token_type_str = json_string type_json in
194
191
match token_type_str, rest with
195
192
| "StartTag", [_ns_json; name_json; attrs_json] ->
196
193
let name = json_string name_json in
197
194
let attrs = parse_attrs attrs_json in
198
198
-
{ token = Some (StartTag (name, attrs));
199
199
-
node = Some (Dom.create_element name ~attrs ());
200
200
-
tag_name = Some name;
201
201
-
is_empty_tag = false }
195
195
+
{ token = Some (StartTag (name, attrs)) }
202
196
203
197
| "StartTag", [name_json; attrs_json] ->
204
198
let name = json_string name_json in
205
199
let attrs = parse_attrs attrs_json in
206
206
-
{ token = Some (StartTag (name, attrs));
207
207
-
node = Some (Dom.create_element name ~attrs ());
208
208
-
tag_name = Some name;
209
209
-
is_empty_tag = false }
200
200
+
{ token = Some (StartTag (name, attrs)) }
210
201
211
202
| "EmptyTag", [name_json; attrs_json] ->
212
203
let name = json_string name_json in
213
204
let attrs = parse_attrs attrs_json in
214
214
-
{ token = Some (EmptyTag (name, attrs));
215
215
-
node = Some (Dom.create_element name ~attrs ());
216
216
-
tag_name = Some name;
217
217
-
is_empty_tag = true }
205
205
+
{ token = Some (EmptyTag (name, attrs)) }
218
206
219
207
| "EndTag", [_ns_json; name_json] ->
220
208
let name = json_string name_json in
221
221
-
{ token = Some (EndTag name);
222
222
-
node = None;
223
223
-
tag_name = Some name;
224
224
-
is_empty_tag = false }
209
209
+
{ token = Some (EndTag name) }
225
210
226
211
| "EndTag", [name_json] ->
227
212
let name = json_string name_json in
228
228
-
{ token = Some (EndTag name);
229
229
-
node = None;
230
230
-
tag_name = Some name;
231
231
-
is_empty_tag = false }
213
213
+
{ token = Some (EndTag name) }
232
214
233
215
| "Characters", [text_json] ->
234
216
let text = json_string text_json in
235
235
-
{ token = Some (TextNode text);
236
236
-
node = Some (Dom.create_text text);
237
237
-
tag_name = None;
238
238
-
is_empty_tag = false }
217
217
+
{ token = Some (TextNode text) }
239
218
240
219
| "Comment", [text_json] ->
241
220
let text = json_string text_json in
242
242
-
{ token = Some (CommentNode text);
243
243
-
node = Some (Dom.create_comment text);
244
244
-
tag_name = None;
245
245
-
is_empty_tag = false }
221
221
+
{ token = Some (CommentNode text) }
246
222
247
223
| "Doctype", [name_json] ->
248
224
let name = json_string name_json in
249
225
let node = Dom.create_doctype ~name () in
250
250
-
{ token = Some (DoctypeNode node);
251
251
-
node = Some node;
252
252
-
tag_name = None;
253
253
-
is_empty_tag = false }
226
226
+
{ token = Some (DoctypeNode node) }
254
227
255
228
| "Doctype", [name_json; public_json] ->
256
229
let name = json_string name_json in
···
259
232
| Some pub -> Dom.create_doctype ~name ~public_id:pub ()
260
233
| None -> Dom.create_doctype ~name ()
261
234
in
262
262
-
{ token = Some (DoctypeNode node);
263
263
-
node = Some node;
264
264
-
tag_name = None;
265
265
-
is_empty_tag = false }
235
235
+
{ token = Some (DoctypeNode node) }
266
236
267
237
| "Doctype", [name_json; public_json; system_json] ->
268
238
let name = json_string name_json in
···
274
244
| None, Some sys -> Dom.create_doctype ~name ~system_id:sys ()
275
245
| None, None -> Dom.create_doctype ~name ()
276
246
in
277
277
-
{ token = Some (DoctypeNode node);
278
278
-
node = Some node;
279
279
-
tag_name = None;
280
280
-
is_empty_tag = false }
247
247
+
{ token = Some (DoctypeNode node) }
281
248
282
282
-
| _ -> { token = None; node = None; tag_name = None; is_empty_tag = false }
249
249
+
| _ -> { token = None }
283
250
284
251
(* Serialize a single node to HTML with options *)
285
252
let escape_text text =
+13
-32
test/test_tokenizer.ml
···
22
22
end
23
23
24
24
(* Test case representation *)
25
25
-
type test_error = {
26
26
-
code : string;
27
27
-
line : int;
28
28
-
col : int;
29
29
-
}
30
30
-
31
25
type test_case = {
32
26
description : string;
33
27
input : string;
34
28
output : Jsont.json list;
35
35
-
errors : test_error list;
29
29
+
expected_error_count : int;
36
30
initial_states : string list;
37
31
last_start_tag : string option;
38
32
double_escaped : bool;
···
87
81
| Jsont.Bool (b, _) -> b
88
82
| _ -> failwith "Expected bool"
89
83
90
90
-
let json_int = function
91
91
-
| Jsont.Number (n, _) -> int_of_float n
92
92
-
| _ -> failwith "Expected number"
93
93
-
94
84
let json_array = function
95
85
| Jsont.Array (arr, _) -> arr
96
86
| _ -> failwith "Expected array"
···
109
99
| Some v -> v
110
100
| None -> failwith ("Missing member: " ^ name)
111
101
112
112
-
(* Parse test error from JSON *)
113
113
-
let parse_test_error json =
114
114
-
let obj = json_object json in
115
115
-
{
116
116
-
code = json_string (json_mem_exn "code" obj);
117
117
-
line = json_int (json_mem_exn "line" obj);
118
118
-
col = json_int (json_mem_exn "col" obj);
119
119
-
}
120
120
-
121
102
(* Parse a single test case from JSON *)
122
103
let parse_test_case ~xml_mode json =
123
104
let obj = json_object json in
124
105
let description = json_string (json_mem_exn "description" obj) in
125
106
let input = json_string (json_mem_exn "input" obj) in
126
107
let output = json_array (json_mem_exn "output" obj) in
127
127
-
let errors = match json_mem "errors" obj with
128
128
-
| Some e -> List.map parse_test_error (json_array e)
129
129
-
| None -> []
108
108
+
let expected_error_count = match json_mem "errors" obj with
109
109
+
| Some e -> List.length (json_array e)
110
110
+
| None -> 0
130
111
in
131
112
let initial_states = match json_mem "initialStates" obj with
132
113
| Some s -> List.map json_string (json_array s)
···
140
121
| Some b -> json_bool b
141
122
| None -> false
142
123
in
143
143
-
{ description; input; output; errors; initial_states; last_start_tag; double_escaped; xml_mode }
124
124
+
{ description; input; output; expected_error_count; initial_states; last_start_tag; double_escaped; xml_mode }
144
125
145
126
(* Convert state name to State.t *)
146
127
let state_of_string = function
···
272
253
List.for_all2 json_equal actual_tokens expected
273
254
in
274
255
275
275
-
let actual_errors = Tokenizer.get_errors tokenizer in
276
276
-
let errors_count_match = List.length actual_errors = List.length test.errors in
256
256
+
let actual_error_count = List.length (Tokenizer.get_errors tokenizer) in
257
257
+
let errors_count_match = actual_error_count = test.expected_error_count in
277
258
278
278
-
(tokens_match && errors_count_match, actual_tokens, expected, actual_errors, test.errors)
259
259
+
(tokens_match && errors_count_match, actual_tokens, expected, actual_error_count, test.expected_error_count)
279
260
280
261
(* Format JSON for display *)
281
262
let rec json_to_string = function
···
331
312
List.iter (fun state_name ->
332
313
try
333
314
let state = state_of_string state_name in
334
334
-
let (success, actual, expected, actual_errors, expected_errors) = run_test test state in
315
315
+
let (success, actual, expected, actual_err_count, expected_err_count) = run_test test state in
335
316
336
317
if success then
337
318
incr passed
338
319
else begin
339
320
incr failed;
340
321
if List.length !first_failures < 3 then
341
341
-
first_failures := (i + 1, test.description, state_name, actual, expected, actual_errors, expected_errors) :: !first_failures
322
322
+
first_failures := (i + 1, test.description, state_name, actual, expected, actual_err_count, expected_err_count) :: !first_failures
342
323
end
343
324
with e ->
344
325
incr failed;
345
326
if List.length !first_failures < 3 then
346
346
-
first_failures := (i + 1, test.description, state_name, [], [], [], []) :: !first_failures;
327
327
+
first_failures := (i + 1, test.description, state_name, [], [], 0, 0) :: !first_failures;
347
328
Printf.eprintf "Exception in test %d (%s): %s\n" (i + 1) test.description (Printexc.to_string e)
348
329
) test.initial_states
349
330
) all_tests;
···
375
356
if !all_failures <> [] then begin
376
357
Printf.printf "\n=== First failures ===\n";
377
358
List.iter (fun (filename, failures) ->
378
378
-
List.iter (fun (test_num, desc, state, actual, expected, actual_errs, expected_errs) ->
359
359
+
List.iter (fun (test_num, desc, state, actual, expected, actual_err_count, expected_err_count) ->
379
360
Printf.printf "\n--- %s test %d (%s) in %s ---\n" filename test_num state desc;
380
361
Printf.printf "Expected tokens: [%s]\n" (String.concat "; " (List.map json_to_string expected));
381
362
Printf.printf "Actual tokens: [%s]\n" (String.concat "; " (List.map json_to_string actual));
382
382
-
Printf.printf "Expected %d errors, got %d\n" (List.length expected_errs) (List.length actual_errs)
363
363
+
Printf.printf "Expected %d errors, got %d\n" expected_err_count actual_err_count
383
364
) failures
384
365
) (List.rev !all_failures)
385
366
end;