digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n6 ms (3 ms, 3 ms, 3 ms (stage 396.0: task 382))";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1,505"];
4 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 1,505"];
5 [labelType="html" label="<br><b>Project</b><br><br>"];
6 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 17"];
7 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 4,096<br>number of input batches: 1"];
}
8 [labelType="html" label="<b>Scan parquet </b><br><br>number of files read: 1<br>scan time total (min, med, max (stageId: taskId))<br>157 ms (3 ms, 3 ms, 151 ms (stage 397.0: task 383))<br>metadata time: 0 ms<br>size of files read: 9.4 MiB<br>number of output rows: 4,096"];
2->0;
3->2;
4->3;
5->4;
6->5;
7->6;
8->7;
}
9
CollectLimit 1500
Project [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939, str_replace_regex(COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939, /<[^>]+>/g, ) AS COL_6CB7024F_EB60_4614_9A19_1F6A990D529D#34087]
Filter isnotnull(COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939)
Generate explode(COL_CBB80C89_C2E2_4CAE_9E96_1384D3814850#33919), [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918], false, [COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939]
Project [DOCUMENT_ID_3241#33673 AS COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, METADOCUMENT_ID_3242#33677 AS COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, PATIENT_ID_3243#33679 AS COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, VISIT_ID_3244#33683 AS COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, VISIT_UNIT_ID_3245#33684 AS COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, UPDATED_AT_3246#33682 AS COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, UNIT_3247#33681 AS COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, DELEGATE_UNIT_3248#33672 AS COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, HIERARCHY_UNIT_3249#33676 AS COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, TITLE_3250#33680 AS COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, DOCUMENT_TYPE_3251#33674 AS COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, CREATED_AT_3252#33671 AS COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, BODY_3253#33670 AS COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, AUTHOR_3254#33669 AS COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, PATIENT_AGE_3255#33678 AS COL_46900168_36DE_4076_8267_A418814B0376#33811, FORMAT_3256#33675 AS COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, str_replace_regex(BODY_3253#33670, <p[^>]*>(.*?)<\/p>, ) AS COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, str_extract_by_regex(str_replace_extend(BODY_3253#33670, [</p>], [</p>.]), <p[^>]*>(.*?)<\/p>) AS COL_CBB80C89_C2E2_4CAE_9E96_1384D3814850#33919]
Filter ((size(str_extract_by_regex(str_replace_extend(BODY_3253#33670, [</p>], [</p>.]), <p[^>]*>(.*?)<\/p>), true) > 0) AND isnotnull(str_extract_by_regex(str_replace_extend(BODY_3253#33670, [</p>], [</p>.]), <p[^>]*>(.*?)<\/p>)))
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet [AUTHOR_3254#33669,BODY_3253#33670,CREATED_AT_3252#33671,DELEGATE_UNIT_3248#33672,DOCUMENT_ID_3241#33673,DOCUMENT_TYPE_3251#33674,FORMAT_3256#33675,HIERARCHY_UNIT_3249#33676,METADOCUMENT_ID_3242#33677,PATIENT_AGE_3255#33678,PATIENT_ID_3243#33679,TITLE_3250#33680,UNIT_3247#33681,UPDATED_AT_3246#33682,VISIT_ID_3244#33683,VISIT_UNIT_ID_3245#33684] Batched: true, DataFilters: [(size(str_extract_by_regex(str_replace_extend(BODY_3253#33670, [</p>], [</p>.]), <p[^>]*>(.*?)<\..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/data/output/cache/parquet/uet/DOCUMENT_3240], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<AUTHOR_3254:string,BODY_3253:string,CREATED_AT_3252:timestamp,DELEGATE_UNIT_3248:string,DO...
== Physical Plan ==
CollectLimit (8)
+- * Project (7)
+- * Filter (6)
+- * Generate (5)
+- * Project (4)
+- * Filter (3)
+- * ColumnarToRow (2)
+- Scan parquet (1)
(1) Scan parquet
Output [16]: [AUTHOR_3254#33669, BODY_3253#33670, CREATED_AT_3252#33671, DELEGATE_UNIT_3248#33672, DOCUMENT_ID_3241#33673, DOCUMENT_TYPE_3251#33674, FORMAT_3256#33675, HIERARCHY_UNIT_3249#33676, METADOCUMENT_ID_3242#33677, PATIENT_AGE_3255#33678, PATIENT_ID_3243#33679, TITLE_3250#33680, UNIT_3247#33681, UPDATED_AT_3246#33682, VISIT_ID_3244#33683, VISIT_UNIT_ID_3245#33684]
Batched: true
Location: InMemoryFileIndex [file:/data/output/cache/parquet/uet/DOCUMENT_3240]
ReadSchema: struct<AUTHOR_3254:string,BODY_3253:string,CREATED_AT_3252:timestamp,DELEGATE_UNIT_3248:string,DOCUMENT_ID_3241:string,DOCUMENT_TYPE_3251:string,FORMAT_3256:string,HIERARCHY_UNIT_3249:string,METADOCUMENT_ID_3242:string,PATIENT_AGE_3255:int,PATIENT_ID_3243:string,TITLE_3250:string,UNIT_3247:string,UPDATED_AT_3246:timestamp,VISIT_ID_3244:string,VISIT_UNIT_ID_3245:string>
(2) ColumnarToRow [codegen id : 1]
Input [16]: [AUTHOR_3254#33669, BODY_3253#33670, CREATED_AT_3252#33671, DELEGATE_UNIT_3248#33672, DOCUMENT_ID_3241#33673, DOCUMENT_TYPE_3251#33674, FORMAT_3256#33675, HIERARCHY_UNIT_3249#33676, METADOCUMENT_ID_3242#33677, PATIENT_AGE_3255#33678, PATIENT_ID_3243#33679, TITLE_3250#33680, UNIT_3247#33681, UPDATED_AT_3246#33682, VISIT_ID_3244#33683, VISIT_UNIT_ID_3245#33684]
(3) Filter [codegen id : 1]
Input [16]: [AUTHOR_3254#33669, BODY_3253#33670, CREATED_AT_3252#33671, DELEGATE_UNIT_3248#33672, DOCUMENT_ID_3241#33673, DOCUMENT_TYPE_3251#33674, FORMAT_3256#33675, HIERARCHY_UNIT_3249#33676, METADOCUMENT_ID_3242#33677, PATIENT_AGE_3255#33678, PATIENT_ID_3243#33679, TITLE_3250#33680, UNIT_3247#33681, UPDATED_AT_3246#33682, VISIT_ID_3244#33683, VISIT_UNIT_ID_3245#33684]
Condition : ((size(str_extract_by_regex(str_replace_extend(BODY_3253#33670, [</p>], [</p>.]), <p[^>]*>(.*?)<\/p>), true) > 0) AND isnotnull(str_extract_by_regex(str_replace_extend(BODY_3253#33670, [</p>], [</p>.]), <p[^>]*>(.*?)<\/p>)))
(4) Project [codegen id : 1]
Output [18]: [DOCUMENT_ID_3241#33673 AS COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, METADOCUMENT_ID_3242#33677 AS COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, PATIENT_ID_3243#33679 AS COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, VISIT_ID_3244#33683 AS COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, VISIT_UNIT_ID_3245#33684 AS COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, UPDATED_AT_3246#33682 AS COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, UNIT_3247#33681 AS COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, DELEGATE_UNIT_3248#33672 AS COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, HIERARCHY_UNIT_3249#33676 AS COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, TITLE_3250#33680 AS COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, DOCUMENT_TYPE_3251#33674 AS COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, CREATED_AT_3252#33671 AS COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, BODY_3253#33670 AS COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, AUTHOR_3254#33669 AS COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, PATIENT_AGE_3255#33678 AS COL_46900168_36DE_4076_8267_A418814B0376#33811, FORMAT_3256#33675 AS COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, str_replace_regex(BODY_3253#33670, <p[^>]*>(.*?)<\/p>, ) AS COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, str_extract_by_regex(str_replace_extend(BODY_3253#33670, [</p>], [</p>.]), <p[^>]*>(.*?)<\/p>) AS COL_CBB80C89_C2E2_4CAE_9E96_1384D3814850#33919]
Input [16]: [AUTHOR_3254#33669, BODY_3253#33670, CREATED_AT_3252#33671, DELEGATE_UNIT_3248#33672, DOCUMENT_ID_3241#33673, DOCUMENT_TYPE_3251#33674, FORMAT_3256#33675, HIERARCHY_UNIT_3249#33676, METADOCUMENT_ID_3242#33677, PATIENT_AGE_3255#33678, PATIENT_ID_3243#33679, TITLE_3250#33680, UNIT_3247#33681, UPDATED_AT_3246#33682, VISIT_ID_3244#33683, VISIT_UNIT_ID_3245#33684]
(5) Generate [codegen id : 1]
Input [18]: [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, COL_CBB80C89_C2E2_4CAE_9E96_1384D3814850#33919]
Arguments: explode(COL_CBB80C89_C2E2_4CAE_9E96_1384D3814850#33919), [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918], false, [COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939]
(6) Filter [codegen id : 1]
Input [18]: [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939]
Condition : isnotnull(COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939)
(7) Project [codegen id : 1]
Output [19]: [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939, str_replace_regex(COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939, /<[^>]+>/g, ) AS COL_6CB7024F_EB60_4614_9A19_1F6A990D529D#34087]
Input [18]: [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939]
(8) CollectLimit
Input [19]: [COL_B8C4154A_9C1F_4379_B2D0_37CB2E2E8163#33797, COL_E64A43C0_92EB_4340_A5E8_C7C27526AB67#33798, COL_93CB3020_C35B_4048_83FD_5071BBD0FA14#33799, COL_7C0A6459_9870_4E69_A193_D12DCD19B4D8#33800, COL_8FADC0CC_2246_48BF_96B6_37B8BA1E1C0D#33801, COL_8A9046D2_1D72_45E3_B655_D46E004711DC#33802, COL_A3AD19B8_0E44_4535_990D_3E89D0346083#33803, COL_AE628ECA_E5BA_4F07_8939_98F393782191#33804, COL_BB50D21B_057C_4291_B5BF_6CE5E982EC4F#33805, COL_70DA6BEE_221D_4064_99F0_A228C6EA03D1#33806, COL_A687F590_7636_41E9_B432_F96E84FA5822#33807, COL_B885B9B6_22DC_43A3_A9BA_7E5273012413#33808, COL_B4024986_B57D_48FD_A06D_AA53298E7CAF#33809, COL_2B0D0A0B_E8F4_4E84_AD0F_67F5167E0998#33810, COL_46900168_36DE_4076_8267_A418814B0376#33811, COL_E95AE405_48D5_4107_B391_1BC4DED5B99A#33812, COL_AECD41D8_DB05_475D_9E5C_621A9BFFB95E#33918, COL_A504A8EC_44DA_47A8_BC01_8F3D6A00E326#33939, COL_6CB7024F_EB60_4614_9A19_1F6A990D529D#34087]
Arguments: 1500