1,269
社区成员




object BroadcastWrapper {
@volatile private var broadcast:Broadcast[List[String]] = null
private var lastUpdatedTime:Date = Calendar.getInstance.getTime()
//解析配置文件中的Columns配置
def getProperties(filePath:String="/home/xxx/test.properties"):List[String]={
val fileStream = new FileInputStream(filePath)
val prop = new Properties()
prop.load(fileStream)
val value = prop.getProperty("columns").split(",").toList
println("value is *******************"+value)
value
}
def getInstance(sc:SparkContext,filePath:String="/home/z672898/lzw/test.properties"):Broadcast[List[String]] ={
if(broadcast==null){
synchronized{
if(broadcast==null)
broadcast = sc.broadcast(getProperties(filePath))
}
}
broadcast
}
def updateAndGet(sc:SparkContext,block:Boolean=false,filePath:String="/home/z672898/lzw/test.properties"): Broadcast[List[String]] ={
val currentTime = Calendar.getInstance().getTime
//1min = 60s = 60000ms
val date_diff = currentTime.getTime -lastUpdatedTime.getTime
//3min update
if(broadcast==null||date_diff>60000){
if(broadcast != null){
/**
* unpersist(blocking):把广播变量从集群中所有保存该广播变量的工作节点的内存中移除
* 布尔类型的blocking参数指定该操作是堵塞直至变量已经从所有节点删除,还是作为异步非堵塞操作执行.
* 如果希望立刻释放内存,应该把这个参数设置为True
*/
broadcast.unpersist(block)
}
val columns = getProperties(filePath)
println("other broadcast:****************"+columns)
broadcast = sc.broadcast(columns)
//更新时间
lastUpdatedTime = Calendar.getInstance().getTime
}
broadcast
}
// 读写序列化
def writeObject(out:ObjectOutputStream): Unit ={
out.writeObject(broadcast)
}
def readObject(in:ObjectInputStream): Unit ={
in.readObject().asInstanceOf[Broadcast[List[String]]]
}
}
在yarn模式上可以操作吗?我试过会报空异常的哦!可不可以贴你成功的代码瞧瞧 可以在spark的driver端创建一个线程,定时调用sc.broadcast,测试过,可用
可以在spark的driver端创建一个线程,定时调用sc.broadcast,测试过,可用