Skip to content

Commit d66642e

Browse files
committed
SPARK-1822: Some minor cleanup work on SchemaRDD.count()
Minor cleanup following mesos#841. Author: Reynold Xin <[email protected]> Closes mesos#868 from rxin/schema-count and squashes the following commits: 5442651 [Reynold Xin] SPARK-1822: Some minor cleanup work on SchemaRDD.count()
1 parent 5c7faec commit d66642e

File tree

4 files changed

+10
-7
lines changed

4 files changed

+10
-7
lines changed

python/pyspark/sql.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,10 @@ def saveAsTable(self, tableName):
323323

324324
def count(self):
325325
"""
326-
Return the number of elements in this RDD.
326+
Return the number of elements in this RDD. Unlike the base RDD
327+
implementation of count, this implementation leverages the query
328+
optimizer to compute the count on the SchemaRDD, which supports
329+
features such as filter pushdown.
327330
328331
>>> srdd = sqlCtx.inferSchema(rdd)
329332
>>> srdd.count()

sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -276,12 +276,12 @@ class SchemaRDD(
276276

277277
/**
278278
* :: Experimental ::
279-
* Overriding base RDD implementation to leverage query optimizer
279+
* Return the number of elements in the RDD. Unlike the base RDD implementation of count, this
280+
* implementation leverages the query optimizer to compute the count on the SchemaRDD, which
281+
* supports features such as filter pushdown.
280282
*/
281283
@Experimental
282-
override def count(): Long = {
283-
groupBy()(Count(Literal(1))).collect().head.getLong(0)
284-
}
284+
override def count(): Long = groupBy()(Count(Literal(1))).collect().head.getLong(0)
285285

286286
/**
287287
* :: Experimental ::

sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ class DslQuerySuite extends QueryTest {
124124
}
125125

126126
test("zero count") {
127-
assert(testData4.count() === 0)
127+
assert(emptyTableData.count() === 0)
128128
}
129129

130130
test("inner join where, one match per row") {

sql/core/src/test/scala/org/apache/spark/sql/TestData.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ object TestData {
4747
(1, null) ::
4848
(2, 2) :: Nil)
4949

50-
val testData4 = logical.LocalRelation('a.int, 'b.int)
50+
val emptyTableData = logical.LocalRelation('a.int, 'b.int)
5151

5252
case class UpperCaseData(N: Int, L: String)
5353
val upperCaseData =

0 commit comments

Comments
 (0)