digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
1 [labelType="html" label="<br><b>HashAggregate</b><br><br>"];
2 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 2<br>shuffle write time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 1018.0: task 1001))<br>data size total (min, med, max (stageId: taskId))<br>32.0 B (0.0 B, 16.0 B, 16.0 B (stage 1018.0: task 1001))<br>number of partitions: 1<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>112.0 B (0.0 B, 56.0 B, 56.0 B (stage 1018.0: task 1001))"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n5 ms (0 ms, 2 ms, 3 ms (stage 1018.0: task 999))";
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>4 ms (0 ms, 2 ms, 2 ms (stage 1018.0: task 1001))<br>number of output rows: 2"];
5 [labelType="html" label="<br><b>Project</b><br><br>"];
}
6 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 84,871"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n6 ms (0 ms, 3 ms, 3 ms (stage 1018.0: task 1001))";
8 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 824,667"];
}
9 [labelType="html" label="<br><b>Project</b><br><br>"];
10 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1,965"];
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n22 ms (0 ms, 11 ms, 11 ms (stage 1018.0: task 1001))";
12 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 4,096<br>number of input batches: 1"];
}
13 [labelType="html" label="<b>Scan parquet </b><br><br>number of files read: 1<br>scan time total (min, med, max (stageId: taskId))<br>204 ms (2 ms, 2 ms, 200 ms (stage 1018.0: task 1000))<br>metadata time: 0 ms<br>size of files read: 9.9 MiB<br>number of output rows: 4,096"];
1->0;
2->1;
4->2;
5->4;
6->5;
8->6;
9->8;
10->9;
12->10;
13->12;
}
14
AdaptiveSparkPlan isFinalPlan=false
HashAggregate(keys=[], functions=[count(1)])
Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15327]
HashAggregate(keys=[], functions=[partial_count(1)])
Project
WholeStageCodegen (3)
Filter (str_to_integer(get_json_object(COL_9F4C7B82_8EA5_42B7_8724_EDC3D750C2D3#101628, $.term_number_in_sentence)) <=> 1)
Generate explode(COL_9E6B0BF3_343E_49F2_87DB_EE022165520A#101608), false, [COL_9F4C7B82_8EA5_42B7_8724_EDC3D750C2D3#101628]
WholeStageCodegen (2)
Project [from_json(ArrayType(StringType,false), to_json(str_to_words(str_replace_regex(str_replace_regex(BODY_3253#101260, <br\s*\/?>, ), <[^<>]+>, )), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_9E6B0BF3_343E_49F2_87DB_EE022165520A#101608]
Filter ((size(from_json(ArrayType(StringType,false), to_json(str_to_words(str_replace_regex(str_replace_regex(BODY_3253#101260, <br\s*\/?>, ), <[^<>]+>, )), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(str_to_words(str_replace_regex(str_replace_regex(BODY_3253#101260, <br\s*\/?>, ), <[^<>]+>, )), Some(Etc/UTC)), Some(Etc/UTC))))
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet [BODY_3253#101260] Batched: true, DataFilters: [(size(from_json(ArrayType(StringType,false), to_json(str_to_words(str_replace_regex(str_replace_..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/data/output/cache/parquet/uet/DOCUMENT_3240], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<BODY_3253:string>