<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Show distinct column values in pyspark dataframe in Governance, Risk, Compliance</title>
    <link>https://community.isc2.org/t5/Governance-Risk-Compliance/Show-distinct-column-values-in-pyspark-dataframe/m-p/61611#M926</link>
    <description>&lt;P&gt;&lt;a href="https://community.isc2.org/t5/user/viewprofilepage/user-id/379800957"&gt;@ursyathi&lt;/a&gt;&amp;nbsp;&amp;nbsp; Have you checked Google?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://stackoverflow.com/questions/39383557/show-distinct-column-values-in-pyspark-dataframe" target="_blank"&gt;https://stackoverflow.com/questions/39383557/show-distinct-column-values-in-pyspark-dataframe&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Not sure whether this helps or not, having never used this data framework myself.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Caute_Cautim&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sun, 13 Aug 2023 06:22:21 GMT</pubDate>
    <dc:creator>Caute_cautim</dc:creator>
    <dc:date>2023-08-13T06:22:21Z</dc:date>
    <item>
      <title>Show distinct column values in pyspark dataframe</title>
      <link>https://community.isc2.org/t5/Governance-Risk-Compliance/Show-distinct-column-values-in-pyspark-dataframe/m-p/61545#M924</link>
      <description>&lt;P data-unlink="true"&gt;With pyspark&amp;nbsp; dataframe, how do you do the equivalent of Pandas&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;df['col'].unique().&lt;/P&gt;&lt;P&gt;I want to list out all the unique values in a pyspark dataframe column.&lt;/P&gt;&lt;P&gt;Not the SQL type way (registertemplate then SQL query for distinct values).&lt;/P&gt;&lt;P&gt;Also I don't need&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;groupby&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;then&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;countDistinct, instead I want to check distinct VALUES in that column.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 09 Oct 2023 10:41:26 GMT</pubDate>
      <guid>https://community.isc2.org/t5/Governance-Risk-Compliance/Show-distinct-column-values-in-pyspark-dataframe/m-p/61545#M924</guid>
      <dc:creator>ursyathi</dc:creator>
      <dc:date>2023-10-09T10:41:26Z</dc:date>
    </item>
    <item>
      <title>Re: Show distinct column values in pyspark dataframe</title>
      <link>https://community.isc2.org/t5/Governance-Risk-Compliance/Show-distinct-column-values-in-pyspark-dataframe/m-p/61611#M926</link>
      <description>&lt;P&gt;&lt;a href="https://community.isc2.org/t5/user/viewprofilepage/user-id/379800957"&gt;@ursyathi&lt;/a&gt;&amp;nbsp;&amp;nbsp; Have you checked Google?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://stackoverflow.com/questions/39383557/show-distinct-column-values-in-pyspark-dataframe" target="_blank"&gt;https://stackoverflow.com/questions/39383557/show-distinct-column-values-in-pyspark-dataframe&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Not sure whether this helps or not, having never used this data framework myself.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Caute_Cautim&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 13 Aug 2023 06:22:21 GMT</pubDate>
      <guid>https://community.isc2.org/t5/Governance-Risk-Compliance/Show-distinct-column-values-in-pyspark-dataframe/m-p/61611#M926</guid>
      <dc:creator>Caute_cautim</dc:creator>
      <dc:date>2023-08-13T06:22:21Z</dc:date>
    </item>
  </channel>
</rss>

