1,269
社区成员




- import org.apache.spark.mllib.linalg.{Vector, Vectors}
- // Create a dense vector (1.0, 0.0, 3.0).
- val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
- // Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values
- corresponding to nonzero entries.
- val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
- // Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
- val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
- import org.apache.spark.mllib.linalg.Vectors
- import org.apache.spark.mllib.regression.LabeledPoint
- // Create a labeled point with a positive label and a dense feature vector.
- val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
- // Create a labeled point with a negative label and a sparse feature vector.
- val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
- import org.apache.spark.mllib.regression.LabeledPoint
- import org.apache.spark.mllib.util.MLUtils
- import org.apache.spark.rdd.RDD
- val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
- import org.apache.spark.mllib.linalg.{Matrix, Matrices}
- // Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
- val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
- import org.apache.spark.mllib.linalg.Vector
- import org.apache.spark.mllib.linalg.distributed.RowMatrix
- val rows: RDD[Vector] = ... // an RDD of local vectors
- // Create a RowMatrix from an RDD[Vector].
- val mat: RowMatrix = new RowMatrix(rows)
- // Get its size.
- val m = mat.numRows()
- val n = mat.numCols()
- import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
- val rows: RDD[IndexedRow] = ... // an RDD of indexed rows
- // Create an IndexedRowMatrix from an RDD[IndexedRow].
- val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)
- // Get its size.
- val m = mat.numRows()
- val n = mat.numCols()
- // Drop its row indices.
- val rowMat: RowMatrix = mat.toRowMatrix()
- import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
- val entries: RDD[MatrixEntry] = ... // an RDD of matrix entries
- // Create a CoordinateMatrix from an RDD[MatrixEntry].
- val mat: CoordinateMatrix = new CoordinateMatrix(entries)
- // Get its size.
- val m = mat.numRows()
- val n = mat.numCols()
- // Convert it to an IndexRowMatrix whose rows are sparse vectors.
- val indexedRowMatrix = mat.toIndexedRowMatrix()