digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: 0 ms";
2 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 0 ms<br>number of output rows: 1"];
}
3 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 3<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>1 ms (0 ms, 0 ms, 0 ms (stage 967.0: task 931))<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 3<br>local bytes read: 171.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 3<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>48.0 B (16.0 B, 16.0 B, 16.0 B (stage 967.0: task 931))<br>local merged bytes read: 0.0 B<br>number of partitions: 1<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>171.0 B (56.0 B, 56.0 B, 59.0 B (stage 967.0: task 932))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n25.8 s (4 ms, 4 ms, 25.8 s (stage 967.0: task 932))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>25.8 s (3 ms, 3 ms, 25.8 s (stage 967.0: task 932))<br>number of output rows: 3"];
6 [labelType="html" label="<br><b>Project</b><br><br>"];
}
7 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n25.8 s (18 ms, 18 ms, 25.8 s (stage 967.0: task 932))";
9 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 737"];
}
10 [labelType="html" label="<br><b>Project</b><br><br>"];
11 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n25.9 s (28 ms, 28 ms, 25.8 s (stage 967.0: task 932))";
13 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 9,522<br>number of input batches: 3"];
}
14 [labelType="html" label="<b>Scan parquet </b><br><br>number of files read: 1<br>scan time total (min, med, max (stageId: taskId))<br>206 ms (3 ms, 3 ms, 200 ms (stage 967.0: task 932))<br>metadata time: 0 ms<br>size of files read: 9.9 MiB<br>number of output rows: 9,522"];
2->0;
3->2;
5->3;
6->5;
7->6;
9->7;
10->9;
11->10;
13->11;
14->13;
}
15
AdaptiveSparkPlan isFinalPlan=true
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (4)
Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14050]
HashAggregate(keys=[], functions=[partial_count(1)])
Project
WholeStageCodegen (3)
Filter (get_json_object(COL_9F4C7B82_8EA5_42B7_8724_EDC3D750C2D3#89374, $.term_number_in_text) <=> 1)
Generate explode(COL_9E6B0BF3_343E_49F2_87DB_EE022165520A#89354), false, [COL_9F4C7B82_8EA5_42B7_8724_EDC3D750C2D3#89374]
WholeStageCodegen (2)
Project [from_json(ArrayType(StringType,false), to_json(str_to_words(str_replace_regex(str_replace_regex(BODY_3253#89006, <br\s*\/?>, ), <[^<>]+>, )), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_9E6B0BF3_343E_49F2_87DB_EE022165520A#89354]
Filter (((DOCUMENT_ID_3241#89009 <=> 8BE75A8015FDF0D67EA8C8C6A4008D75E25BAEEF) AND (size(from_json(ArrayType(StringType,false), to_json(str_to_words(str_replace_regex(str_replace_regex(BODY_3253#89006, <br\s*\/?>, ), <[^<>]+>, )), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0)) AND isnotnull(from_json(ArrayType(StringType,false), to_json(str_to_words(str_replace_regex(str_replace_regex(BODY_3253#89006, <br\s*\/?>, ), <[^<>]+>, )), Some(Etc/UTC)), Some(Etc/UTC))))
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet [BODY_3253#89006,DOCUMENT_ID_3241#89009] Batched: true, DataFilters: [(DOCUMENT_ID_3241#89009 <=> 8BE75A8015FDF0D67EA8C8C6A4008D75E25BAEEF), (size(from_json(ArrayType..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/data/output/cache/parquet/uet/DOCUMENT_3240], PartitionFilters: [], PushedFilters: [EqualNullSafe(DOCUMENT_ID_3241,8BE75A8015FDF0D67EA8C8C6A4008D75E25BAEEF)], ReadSchema: struct<BODY_3253:string,DOCUMENT_ID_3241:string>