Sort

import static org.junit.Assert.*;

import java.util.Arrays;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import com.holdenkarau.spark.testing.SharedJavaSparkContext;

public class SorkTest extends SharedJavaSparkContext {

	private SparkSession spark;
	
	@Before
	public void setUp() throws Exception {
		spark = new SparkSession(sc());
	}
	
	@After
	public void tearDown() throws Exception {
		sc().stop();
	}	
	
	@Test
	public void test() {
		Dataset<String>  data=spark.read().textFile("D:\\Qtmp\\data\\sort.txt");
		data.javaRDD().flatMap(s -> Arrays.asList(s.split(" ")).iterator())
		.sortBy(i->Integer.valueOf(i),true,4)
		.collect().forEach(System.out::println);;
		
	}

}

pom.xml

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<slf4j.version>1.7.25</slf4j.version>
		<log4j2.version>2.9.1</log4j2.version>
		<spark.version>2.2.0</spark.version>
	</properties>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>3.0</version>
				<configuration>
					<encoding>${project.build.sourceEncoding}</encoding>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
			<!-- sub libraries used -->
			<plugin>
				<artifactId>maven-assembly-plugin</artifactId>
				<version>3.1.0</version>
				<configuration>
					<descriptors>
						<descriptor>descriptor.xml</descriptor>
					</descriptors>
					<archive>
						<manifest>
							<mainClass>newegg.tw.test.Test03.Run</mainClass>
							<addClasspath>true</addClasspath>
							<classpathPrefix>lib/</classpathPrefix>
						</manifest>
						<manifestEntries>
    						<Class-Path>./</Class-Path>
  						</manifestEntries>						
					</archive>
				</configuration>
				<executions>
					<execution>
						<id>make-assembly</id>
						<phase>package</phase>
						<goals>
							<goal>single</goal>
						</goals>
					</execution>
				</executions>

			</plugin>

			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-dependency-plugin</artifactId>
				<version>3.0.2</version>
				<executions>
					<execution>
						<id>copy-dependencies</id>
						<phase>package</phase>
						<goals>
							<goal>copy-dependencies</goal>
						</goals>
						<configuration>							
							<includeScope>runtime</includeScope>							
							<outputDirectory>${project.build.directory}/lib</outputDirectory>
							<overWriteReleases>false</overWriteReleases>
							<overWriteSnapshots>false</overWriteSnapshots>
							<overWriteIfNewer>true</overWriteIfNewer>
						</configuration>
					</execution>
				</executions>
			</plugin>

		</plugins>
	</build>

	<dependencies>

		<!-- log -->
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-api</artifactId>
			<version>${slf4j.version}</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>jcl-over-slf4j</artifactId>
			<version>${slf4j.version}</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>jul-to-slf4j</artifactId>
			<version>${slf4j.version}</version>
		</dependency>
		
		<!-- apache spark -->
		<dependency>
			<groupId>org.apache.spark</groupId>
			<artifactId>spark-core_2.11</artifactId>
			<version>${spark.version}</version>
			<scope>provided</scope>
			<exclusions>
				<exclusion>
					<groupId>log4j</groupId>
					<artifactId>log4j</artifactId>
				</exclusion>
				<exclusion>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
    		<groupId>org.apache.spark</groupId>
    		<artifactId>spark-sql_2.11</artifactId>
    		<version>${spark.version}</version>
    		<scope>provided</scope>
		</dependency>
		<dependency>
    		<groupId>org.apache.spark</groupId>
    		<artifactId>spark-mllib_2.11</artifactId>
    		<version>${spark.version}</version>  
    		<scope>provided</scope>  		
		</dependency>
<dependency>
    <groupId>com.datastax.spark</groupId>
    <artifactId>spark-cassandra-connector_2.11</artifactId>
    <version>2.0.6</version>
</dependency>
		

<!-- test -->
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>com.holdenkarau</groupId>
			<artifactId>spark-testing-base_2.11</artifactId>
			<version>2.2.0_0.8.0</version>
			<scope>test</scope>
			<exclusions>
				<exclusion>
					<groupId>log4j</groupId>
					<artifactId>log4j</artifactId>
				</exclusion>
				<exclusion>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
				</exclusion>
				<exclusion>
					<groupId>commons-logging</groupId>
					<artifactId>commons-logging</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
	</dependencies>

	<profiles>
		<profile>
			<id>log4j-1.2</id>
			<dependencies>
				<!-- log4j 1.2 -->
				<dependency>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
					<version>${slf4j.version}</version>
				</dependency>
			</dependencies>
		</profile>
		<profile>
			<id>log4j-2</id>
			<activation>
				<activeByDefault>true</activeByDefault>
			</activation>
			<dependencies>
				<!-- log4j2 -->
				<dependency>
					<groupId>org.apache.logging.log4j</groupId>
					<artifactId>log4j-slf4j-impl</artifactId>
					<version>${log4j2.version}</version>
				</dependency>
				<dependency>
					<groupId>org.apache.logging.log4j</groupId>
					<artifactId>log4j-core</artifactId>
					<version>${log4j2.version}</version>
				</dependency>
				<dependency>
					<groupId>org.apache.logging.log4j</groupId>
					<artifactId>log4j-1.2-api</artifactId>
					<version>${log4j2.version}</version>
				</dependency>
				<dependency>
   					<groupId>org.apache.logging.log4j</groupId>
    				<artifactId>log4j-nosql</artifactId>
    				<version>${log4j2.version}</version>
				</dependency>
				
				<dependency>
  					<groupId>com.datastax.cassandra</groupId>
  					<artifactId>cassandra-driver-core</artifactId>
  					<version>3.4.0</version>
				</dependency>				
			</dependencies>
		</profile>
	</profiles>