MyException - 我的异常网
当前位置:我的异常网» 综合 » 基数预估算法 异常率验证

基数预估算法 异常率验证

www.MyException.Cn  网友分享于:2013-12-30  浏览:0次
基数预估算法 错误率验证
package hllctest

import java.util

import org.scalatest.{FlatSpec}
import org.spark.sqludf.HLLCounter

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Random

class HllcCrossSetTest extends FlatSpec {

  val ramdom = new Random()
  val m = 18

  // 用于验证hllc 的错误率

  def errorRateCal(sampleCount: Int) = {

    errorRate(0.01, sampleCount)
    errorRate(0.05, sampleCount)
    errorRate(0.1, sampleCount)
    errorRate(0.2, sampleCount)
    errorRate(0.5, sampleCount)

  }


  "hllc test" should "hllc merge,mix error rate" in {
    errorRateCal(1000)
    errorRateCal(2000)
    errorRateCal(5000)
    errorRateCal(10000)
    errorRateCal(20000)
    errorRateCal(50000)
    errorRateCal(100000)
    errorRateCal(200000)
    errorRateCal(500000)
    errorRateCal(100000)
    errorRateCal(200000)
    errorRateCal(500000)
  }


  // 不放回抽样  测试集合生成
  def getRandomStr(setCollection: mutable.HashSet[String], totalIntArray: Array[Int]): Unit = {
    val str = getTestString(totalIntArray)
    if (!setCollection.contains(str)) setCollection.add(str)
    else
      getRandomStr(setCollection, totalIntArray)
  }


  //5%    100w 级别集合  计算总量
  def errorRate(r: Double, testSetLength: Int) = {
    println(s" ********************begin r :{$r},testSetLength:{$testSetLength} ,m:{$m} ***********************")
    var setA = new mutable.HashSet[String]()
    var setB = new mutable.HashSet[String]()
    val tatolCount = (testSetLength / r).toInt
    var b = System.currentTimeMillis()

    val totalIntArray = (tatolCount + "").toCharArray.map(x => x.toString.toInt)

    var timeRecord = System.currentTimeMillis()
    for (i <- 0 until testSetLength) {
      getRandomStr(setA, totalIntArray)
      getRandomStr(setB, totalIntArray)
      //      if (i%5000 == 0) {
      //        println(s" generate data ${i} ,cost: ${System.currentTimeMillis()-timeRecord}) ")
      //        timeRecord = System.currentTimeMillis()
      //      }
    }


    println(s"tatolCount: ${tatolCount}  ,r : ${r}  setA size: ${setA.size} , setB size: ${setB.size} ")

    var e = System.currentTimeMillis()
    println(s" generate data cost time: ${e - b}  ")

    /*
    * realMix  交集
    * hllcMix
    * realMerge
    * hllcMerge
    * */

    b = System.currentTimeMillis()
    val realMixCnt = realMix(setA, setB)
    val mixRate = realMixRate(realMixCnt, setA)
    println(s" realMixCnt: ${realMixCnt} , mixRate:${mixRate}")
    e = System.currentTimeMillis()
    println(s" Map collection cost time: ${e - b}  ")

    b = System.currentTimeMillis()
    val hllcMixCnt = hllcMix(setA, setB)
    val mixRatehllc = realMixRate(hllcMixCnt, setA)
    println(s" hllcMixCnt: ${hllcMixCnt} , mixRatehllc:${mixRatehllc}")
    val distinct = mixRatehllc - mixRate
    println(f" mixRatehllc - mixRate:  $distinct%1.6f ")
    println(f" hllcMixCnt - realMixCnt:  ${hllcMixCnt - realMixCnt} ")
    e = System.currentTimeMillis()
    println(s" hllc cost time: ${e - b}  ")


  }


  def realMix(setA: mutable.HashSet[String], setB: mutable.HashSet[String]) = {
    val hashSet = new util.HashSet[String]
    setA.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
    var mixCount = 0
    setB.foreach(str => if (hashSet.contains(str)) mixCount += 1)
    mixCount
  }


  def realMixRate(mixCount: Int, set: mutable.HashSet[String]) = {
    mixCount * 1.0 / set.size
  }


  def realMixRate(mixCount: Long, set: mutable.HashSet[String]) = {
    mixCount * 1.0 / set.size
  }


  def hllcMix(setA: mutable.HashSet[String], setB: mutable.HashSet[String]): Long = {

    val hllc16A = new HLLCounter(m)
    setA.foreach(item => hllc16A.add(item))

    val hllc16B = new HLLCounter(m)
    setA.foreach(item => hllc16B.add(item))
    hllc16A.getCountEstimate + hllc16B.getCountEstimate - hllcMerge(setA, setB)
  }


  def hllcMerge(setA: mutable.HashSet[String], setB: mutable.HashSet[String]) = {
    val hllc16 = new HLLCounter(m)
    setA.foreach(item => hllc16.add(item))
    setB.foreach(item => hllc16.add(item))
    hllc16.getCountEstimate
  }

  def realMerge(setA: ArrayBuffer[String], setB: ArrayBuffer[String]) = {
    val hashSet = new util.HashSet[String]
    setA.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
    setB.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
    hashSet.size()
  }


  def getTestString(totalCountArray: Array[Int]) = {
    val sbf = new StringBuffer()
    //没一位的数字是几, 然后根据这个来生成随机数
    totalCountArray.foreach(s => {
      if (!0.equals(s))
        sbf.append(getRamdomStringS(s))
      else sbf.append(getRamdomStringS(10))
    })
    sbf.toString
  }


  //  n -> 10 ^^n
  def getRamdomString(length: Int): String = {
    val sbf = new StringBuffer()
    for (i <- 0 until length) sbf.append((ramdom.nextInt(10) + 97).toChar)
    sbf.toString
  }


  //  n -> 10 ^^n
  def getRamdomStringS(l: Int): String = {
    (ramdom.nextInt(l) + 97).toChar.toString
  }


}


********************begin r :{0.01},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.01  setA size: 1000 , setB size: 1000
generate data cost time: 48 
realMixCnt: 12 , mixRate:0.012
Map collection cost time: 6 
hllcMixCnt: 8 , mixRatehllc:0.008
mixRatehllc - mixRate:  -0.004000
hllcMixCnt - realMixCnt:  -4
hllc cost time: 131 
********************begin r :{0.05},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 20000  ,r : 0.05  setA size: 1000 , setB size: 1000
generate data cost time: 4 
realMixCnt: 49 , mixRate:0.049
Map collection cost time: 3 
hllcMixCnt: 48 , mixRatehllc:0.048
mixRatehllc - mixRate:  -0.001000
hllcMixCnt - realMixCnt:  -1
hllc cost time: 19 
********************begin r :{0.1},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 10000  ,r : 0.1  setA size: 1000 , setB size: 1000
generate data cost time: 3 
realMixCnt: 108 , mixRate:0.108
Map collection cost time: 5 
hllcMixCnt: 107 , mixRatehllc:0.107
mixRatehllc - mixRate:  -0.001000
hllcMixCnt - realMixCnt:  -1
hllc cost time: 15 
********************begin r :{0.2},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 5000  ,r : 0.2  setA size: 1000 , setB size: 1000
generate data cost time: 3 
realMixCnt: 196 , mixRate:0.196
Map collection cost time: 1 
hllcMixCnt: 195 , mixRatehllc:0.195
mixRatehllc - mixRate:  -0.001000
hllcMixCnt - realMixCnt:  -1
hllc cost time: 16 
********************begin r :{0.5},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 2000  ,r : 0.5  setA size: 1000 , setB size: 1000
generate data cost time: 7 
realMixCnt: 489 , mixRate:0.489
Map collection cost time: 1 
hllcMixCnt: 490 , mixRatehllc:0.49
mixRatehllc - mixRate:  0.001000
hllcMixCnt - realMixCnt:  1
hllc cost time: 8 
********************begin r :{0.01},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.01  setA size: 2000 , setB size: 2000
generate data cost time: 6 
realMixCnt: 11 , mixRate:0.0055
Map collection cost time: 0 
hllcMixCnt: 19 , mixRatehllc:0.0095
mixRatehllc - mixRate:  0.004000
hllcMixCnt - realMixCnt:  8
hllc cost time: 24 
********************begin r :{0.05},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 40000  ,r : 0.05  setA size: 2000 , setB size: 2000
generate data cost time: 5 
realMixCnt: 102 , mixRate:0.051
Map collection cost time: 1 
hllcMixCnt: 110 , mixRatehllc:0.055
mixRatehllc - mixRate:  0.004000
hllcMixCnt - realMixCnt:  8
hllc cost time: 11 
********************begin r :{0.1},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 20000  ,r : 0.1  setA size: 2000 , setB size: 2000
generate data cost time: 4 
realMixCnt: 192 , mixRate:0.096
Map collection cost time: 0 
hllcMixCnt: 192 , mixRatehllc:0.096
mixRatehllc - mixRate:  0.000000
hllcMixCnt - realMixCnt:  0
hllc cost time: 11 
********************begin r :{0.2},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 10000  ,r : 0.2  setA size: 2000 , setB size: 2000
generate data cost time: 3 
realMixCnt: 395 , mixRate:0.1975
Map collection cost time: 1 
hllcMixCnt: 387 , mixRatehllc:0.1935
mixRatehllc - mixRate:  -0.004000
hllcMixCnt - realMixCnt:  -8
hllc cost time: 12 
********************begin r :{0.5},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 4000  ,r : 0.5  setA size: 2000 , setB size: 2000
generate data cost time: 6 
realMixCnt: 986 , mixRate:0.493
Map collection cost time: 1 
hllcMixCnt: 981 , mixRatehllc:0.4905
mixRatehllc - mixRate:  -0.002500
hllcMixCnt - realMixCnt:  -5
hllc cost time: 16 
********************begin r :{0.01},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.01  setA size: 5000 , setB size: 5000
generate data cost time: 17 
realMixCnt: 48 , mixRate:0.0096
Map collection cost time: 2 
hllcMixCnt: 41 , mixRatehllc:0.0082
mixRatehllc - mixRate:  -0.001400
hllcMixCnt - realMixCnt:  -7
hllc cost time: 13 
********************begin r :{0.05},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.05  setA size: 5000 , setB size: 5000
generate data cost time: 7 
realMixCnt: 263 , mixRate:0.0526
Map collection cost time: 1 
hllcMixCnt: 271 , mixRatehllc:0.0542
mixRatehllc - mixRate:  0.001600
hllcMixCnt - realMixCnt:  8
hllc cost time: 13 
********************begin r :{0.1},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 50000  ,r : 0.1  setA size: 5000 , setB size: 5000
generate data cost time: 6 
realMixCnt: 527 , mixRate:0.1054
Map collection cost time: 0 
hllcMixCnt: 526 , mixRatehllc:0.1052
mixRatehllc - mixRate:  -0.000200
hllcMixCnt - realMixCnt:  -1
hllc cost time: 26 
********************begin r :{0.2},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 25000  ,r : 0.2  setA size: 5000 , setB size: 5000
generate data cost time: 16 
realMixCnt: 2505 , mixRate:0.501
Map collection cost time: 4 
hllcMixCnt: 2496 , mixRatehllc:0.4992
mixRatehllc - mixRate:  -0.001800
hllcMixCnt - realMixCnt:  -9
hllc cost time: 16 
********************begin r :{0.5},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 10000  ,r : 0.5  setA size: 5000 , setB size: 5000
generate data cost time: 8 
realMixCnt: 2499 , mixRate:0.4998
Map collection cost time: 1 
hllcMixCnt: 2505 , mixRatehllc:0.501
mixRatehllc - mixRate:  0.001200
hllcMixCnt - realMixCnt:  6
hllc cost time: 14 
********************begin r :{0.01},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.01  setA size: 10000 , setB size: 10000
generate data cost time: 15 
realMixCnt: 103 , mixRate:0.0103
Map collection cost time: 2 
hllcMixCnt: 62 , mixRatehllc:0.0062
mixRatehllc - mixRate:  -0.004100
hllcMixCnt - realMixCnt:  -41
hllc cost time: 24 
********************begin r :{0.05},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.05  setA size: 10000 , setB size: 10000
generate data cost time: 18 
realMixCnt: 484 , mixRate:0.0484
Map collection cost time: 2 
hllcMixCnt: 467 , mixRatehllc:0.0467
mixRatehllc - mixRate:  -0.001700
hllcMixCnt - realMixCnt:  -17
hllc cost time: 18 
********************begin r :{0.1},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.1  setA size: 10000 , setB size: 10000
generate data cost time: 11 
realMixCnt: 938 , mixRate:0.0938
Map collection cost time: 1 
hllcMixCnt: 967 , mixRatehllc:0.0967
mixRatehllc - mixRate:  0.002900
hllcMixCnt - realMixCnt:  29
hllc cost time: 12 
********************begin r :{0.2},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 50000  ,r : 0.2  setA size: 10000 , setB size: 10000
generate data cost time: 10 
realMixCnt: 1997 , mixRate:0.1997
Map collection cost time: 5 
hllcMixCnt: 1999 , mixRatehllc:0.1999
mixRatehllc - mixRate:  0.000200
hllcMixCnt - realMixCnt:  2
hllc cost time: 15 
********************begin r :{0.5},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 20000  ,r : 0.5  setA size: 10000 , setB size: 10000
generate data cost time: 18 
realMixCnt: 5010 , mixRate:0.501
Map collection cost time: 4 
hllcMixCnt: 4990 , mixRatehllc:0.499
mixRatehllc - mixRate:  -0.002000
hllcMixCnt - realMixCnt:  -20
hllc cost time: 23 
********************begin r :{0.01},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.01  setA size: 20000 , setB size: 20000
generate data cost time: 41 
realMixCnt: 218 , mixRate:0.0109
Map collection cost time: 4 
hllcMixCnt: 134 , mixRatehllc:0.0067
mixRatehllc - mixRate:  -0.004200
hllcMixCnt - realMixCnt:  -84
hllc cost time: 36 
********************begin r :{0.05},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 400000  ,r : 0.05  setA size: 20000 , setB size: 20000
generate data cost time: 19 
realMixCnt: 946 , mixRate:0.0473
Map collection cost time: 3 
hllcMixCnt: 949 , mixRatehllc:0.04745
mixRatehllc - mixRate:  0.000150
hllcMixCnt - realMixCnt:  3
hllc cost time: 23 
********************begin r :{0.1},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.1  setA size: 20000 , setB size: 20000
generate data cost time: 26 
realMixCnt: 2001 , mixRate:0.10005
Map collection cost time: 10 
hllcMixCnt: 2080 , mixRatehllc:0.104
mixRatehllc - mixRate:  0.003950
hllcMixCnt - realMixCnt:  79
hllc cost time: 56 
********************begin r :{0.2},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.2  setA size: 20000 , setB size: 20000
generate data cost time: 28 
realMixCnt: 4034 , mixRate:0.2017
Map collection cost time: 7 
hllcMixCnt: 4113 , mixRatehllc:0.20565
mixRatehllc - mixRate:  0.003950
hllcMixCnt - realMixCnt:  79
hllc cost time: 25 
********************begin r :{0.5},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 40000  ,r : 0.5  setA size: 20000 , setB size: 20000
generate data cost time: 24 
realMixCnt: 9975 , mixRate:0.49875
Map collection cost time: 8 
hllcMixCnt: 9994 , mixRatehllc:0.4997
mixRatehllc - mixRate:  0.000950
hllcMixCnt - realMixCnt:  19
hllc cost time: 23 
********************begin r :{0.01},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 5000000  ,r : 0.01  setA size: 50000 , setB size: 50000
generate data cost time: 88 
realMixCnt: 468 , mixRate:0.00936
Map collection cost time: 48 
hllcMixCnt: 603 , mixRatehllc:0.01206
mixRatehllc - mixRate:  0.002700
hllcMixCnt - realMixCnt:  135
hllc cost time: 163 
********************begin r :{0.05},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.05  setA size: 50000 , setB size: 50000
generate data cost time: 99 
realMixCnt: 2381 , mixRate:0.04762
Map collection cost time: 17 
hllcMixCnt: 2335 , mixRatehllc:0.0467
mixRatehllc - mixRate:  -0.000920
hllcMixCnt - realMixCnt:  -46
hllc cost time: 59 
********************begin r :{0.1},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.1  setA size: 50000 , setB size: 50000
generate data cost time: 52 
realMixCnt: 5091 , mixRate:0.10182
Map collection cost time: 19 
hllcMixCnt: 5116 , mixRatehllc:0.10232
mixRatehllc - mixRate:  0.000500
hllcMixCnt - realMixCnt:  25
hllc cost time: 88 
********************begin r :{0.2},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 250000  ,r : 0.2  setA size: 50000 , setB size: 50000
generate data cost time: 72 
realMixCnt: 24889 , mixRate:0.49778
Map collection cost time: 18 
hllcMixCnt: 25002 , mixRatehllc:0.50004
mixRatehllc - mixRate:  0.002260
hllcMixCnt - realMixCnt:  113
hllc cost time: 61 
********************begin r :{0.5},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.5  setA size: 50000 , setB size: 50000
generate data cost time: 86 
realMixCnt: 25140 , mixRate:0.5028
Map collection cost time: 15 
hllcMixCnt: 25190 , mixRatehllc:0.5038
mixRatehllc - mixRate:  0.001000
hllcMixCnt - realMixCnt:  50
hllc cost time: 71 
********************begin r :{0.01},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.01  setA size: 100000 , setB size: 100000
generate data cost time: 154 
realMixCnt: 1051 , mixRate:0.01051
Map collection cost time: 29 
hllcMixCnt: 811 , mixRatehllc:0.00811
mixRatehllc - mixRate:  -0.002400
hllcMixCnt - realMixCnt:  -240
hllc cost time: 232 
********************begin r :{0.05},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.05  setA size: 100000 , setB size: 100000
generate data cost time: 171 
realMixCnt: 4903 , mixRate:0.04903
Map collection cost time: 19 
hllcMixCnt: 5095 , mixRatehllc:0.05095
mixRatehllc - mixRate:  0.001920
hllcMixCnt - realMixCnt:  192
hllc cost time: 122 
********************begin r :{0.1},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.1  setA size: 100000 , setB size: 100000
generate data cost time: 131 
realMixCnt: 9931 , mixRate:0.09931
Map collection cost time: 42 
hllcMixCnt: 10136 , mixRatehllc:0.10136
mixRatehllc - mixRate:  0.002050
hllcMixCnt - realMixCnt:  205
hllc cost time: 155 
********************begin r :{0.2},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.2  setA size: 100000 , setB size: 100000
generate data cost time: 117 
realMixCnt: 20148 , mixRate:0.20148
Map collection cost time: 35 
hllcMixCnt: 20414 , mixRatehllc:0.20414
mixRatehllc - mixRate:  0.002660
hllcMixCnt - realMixCnt:  266
hllc cost time: 111 
********************begin r :{0.5},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.5  setA size: 100000 , setB size: 100000
generate data cost time: 130 
realMixCnt: 49964 , mixRate:0.49964
Map collection cost time: 35 
hllcMixCnt: 50268 , mixRatehllc:0.50268
mixRatehllc - mixRate:  0.003040
hllcMixCnt - realMixCnt:  304
hllc cost time: 133 
********************begin r :{0.01},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 20000000  ,r : 0.01  setA size: 200000 , setB size: 200000
generate data cost time: 260 
realMixCnt: 2035 , mixRate:0.010175
Map collection cost time: 83 
hllcMixCnt: 1247 , mixRatehllc:0.006235
mixRatehllc - mixRate:  -0.003940
hllcMixCnt - realMixCnt:  -788
hllc cost time: 389 
********************begin r :{0.05},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 4000000  ,r : 0.05  setA size: 200000 , setB size: 200000
generate data cost time: 311 
realMixCnt: 10159 , mixRate:0.050795
Map collection cost time: 94 
hllcMixCnt: 10030 , mixRatehllc:0.05015
mixRatehllc - mixRate:  -0.000645
hllcMixCnt - realMixCnt:  -129
hllc cost time: 308 
********************begin r :{0.1},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.1  setA size: 200000 , setB size: 200000
generate data cost time: 255 
realMixCnt: 20009 , mixRate:0.100045
Map collection cost time: 133 
hllcMixCnt: 19539 , mixRatehllc:0.097695
mixRatehllc - mixRate:  -0.002350
hllcMixCnt - realMixCnt:  -470
hllc cost time: 235 
********************begin r :{0.2},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.2  setA size: 200000 , setB size: 200000
generate data cost time: 229 
realMixCnt: 39946 , mixRate:0.19973
Map collection cost time: 92 
hllcMixCnt: 41310 , mixRatehllc:0.20655
mixRatehllc - mixRate:  0.006820
hllcMixCnt - realMixCnt:  1364
hllc cost time: 271 
********************begin r :{0.5},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 400000  ,r : 0.5  setA size: 200000 , setB size: 200000
generate data cost time: 357 
realMixCnt: 100095 , mixRate:0.500475
Map collection cost time: 93 
hllcMixCnt: 100242 , mixRatehllc:0.50121
mixRatehllc - mixRate:  0.000735
hllcMixCnt - realMixCnt:  147
hllc cost time: 422 
********************begin r :{0.01},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 50000000  ,r : 0.01  setA size: 500000 , setB size: 500000
generate data cost time: 758 
realMixCnt: 5084 , mixRate:0.010168
Map collection cost time: 211 
hllcMixCnt: 2978 , mixRatehllc:0.005956
mixRatehllc - mixRate:  -0.004212
hllcMixCnt - realMixCnt:  -2106
hllc cost time: 844 
********************begin r :{0.05},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.05  setA size: 500000 , setB size: 500000
generate data cost time: 721 
realMixCnt: 25296 , mixRate:0.050592
Map collection cost time: 222 
hllcMixCnt: 23440 , mixRatehllc:0.04688
mixRatehllc - mixRate:  -0.003712
hllcMixCnt - realMixCnt:  -1856
hllc cost time: 699 
********************begin r :{0.1},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 5000000  ,r : 0.1  setA size: 500000 , setB size: 500000
generate data cost time: 688 
realMixCnt: 50178 , mixRate:0.100356
Map collection cost time: 200 
hllcMixCnt: 45070 , mixRatehllc:0.09014
mixRatehllc - mixRate:  -0.010216
hllcMixCnt - realMixCnt:  -5108
hllc cost time: 701 
********************begin r :{0.2},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 2500000  ,r : 0.2  setA size: 500000 , setB size: 500000
generate data cost time: 897 
realMixCnt: 249899 , mixRate:0.499798
Map collection cost time: 223 
hllcMixCnt: 250263 , mixRatehllc:0.500526
mixRatehllc - mixRate:  0.000728
hllcMixCnt - realMixCnt:  364
hllc cost time: 658 
********************begin r :{0.5},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.5  setA size: 500000 , setB size: 500000
generate data cost time: 868 
realMixCnt: 249895 , mixRate:0.49979
Map collection cost time: 245 
hllcMixCnt: 249916 , mixRatehllc:0.499832
mixRatehllc - mixRate:  0.000042
hllcMixCnt - realMixCnt:  21
hllc cost time: 724 
********************begin r :{0.01},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.01  setA size: 100000 , setB size: 100000
generate data cost time: 110 
realMixCnt: 1026 , mixRate:0.01026
Map collection cost time: 28 
hllcMixCnt: 569 , mixRatehllc:0.00569
mixRatehllc - mixRate:  -0.004570
hllcMixCnt - realMixCnt:  -457
hllc cost time: 95 
********************begin r :{0.05},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.05  setA size: 100000 , setB size: 100000
generate data cost time: 91 
realMixCnt: 5024 , mixRate:0.05024
Map collection cost time: 26 
hllcMixCnt: 5439 , mixRatehllc:0.05439
mixRatehllc - mixRate:  0.004150
hllcMixCnt - realMixCnt:  415
hllc cost time: 131 
********************begin r :{0.1},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.1  setA size: 100000 , setB size: 100000
generate data cost time: 93 
realMixCnt: 9925 , mixRate:0.09925
Map collection cost time: 28 
hllcMixCnt: 10201 , mixRatehllc:0.10201
mixRatehllc - mixRate:  0.002760
hllcMixCnt - realMixCnt:  276
hllc cost time: 141 
********************begin r :{0.2},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.2  setA size: 100000 , setB size: 100000
generate data cost time: 90 
realMixCnt: 19983 , mixRate:0.19983
Map collection cost time: 32 
hllcMixCnt: 19936 , mixRatehllc:0.19936
mixRatehllc - mixRate:  -0.000470
hllcMixCnt - realMixCnt:  -47
hllc cost time: 128 
********************begin r :{0.5},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.5  setA size: 100000 , setB size: 100000
generate data cost time: 121 
realMixCnt: 50027 , mixRate:0.50027
Map collection cost time: 35 
hllcMixCnt: 49726 , mixRatehllc:0.49726
mixRatehllc - mixRate:  -0.003010
hllcMixCnt - realMixCnt:  -301
hllc cost time: 137 
********************begin r :{0.01},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 20000000  ,r : 0.01  setA size: 200000 , setB size: 200000
generate data cost time: 247 
realMixCnt: 1991 , mixRate:0.009955
Map collection cost time: 38 
hllcMixCnt: 2118 , mixRatehllc:0.01059
mixRatehllc - mixRate:  0.000635
hllcMixCnt - realMixCnt:  127
hllc cost time: 200 
********************begin r :{0.05},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 4000000  ,r : 0.05  setA size: 200000 , setB size: 200000
generate data cost time: 225 
realMixCnt: 10000 , mixRate:0.05
Map collection cost time: 71 
hllcMixCnt: 9751 , mixRatehllc:0.048755
mixRatehllc - mixRate:  -0.001245
hllcMixCnt - realMixCnt:  -249
hllc cost time: 273 
********************begin r :{0.1},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.1  setA size: 200000 , setB size: 200000
generate data cost time: 224 
realMixCnt: 19974 , mixRate:0.09987
Map collection cost time: 71 
hllcMixCnt: 19810 , mixRatehllc:0.09905
mixRatehllc - mixRate:  -0.000820
hllcMixCnt - realMixCnt:  -164
hllc cost time: 300 
********************begin r :{0.2},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.2  setA size: 200000 , setB size: 200000
generate data cost time: 243 
realMixCnt: 40093 , mixRate:0.200465
Map collection cost time: 82 
hllcMixCnt: 40549 , mixRatehllc:0.202745
mixRatehllc - mixRate:  0.002280
hllcMixCnt - realMixCnt:  456
hllc cost time: 297 
********************begin r :{0.5},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 400000  ,r : 0.5  setA size: 200000 , setB size: 200000
generate data cost time: 283 
realMixCnt: 99874 , mixRate:0.49937
Map collection cost time: 88 
hllcMixCnt: 99730 , mixRatehllc:0.49865
mixRatehllc - mixRate:  -0.000720
hllcMixCnt - realMixCnt:  -144
hllc cost time: 306 
********************begin r :{0.01},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 50000000  ,r : 0.01  setA size: 500000 , setB size: 500000
generate data cost time: 678 
realMixCnt: 5148 , mixRate:0.010296
Map collection cost time: 181 
hllcMixCnt: 3895 , mixRatehllc:0.00779
mixRatehllc - mixRate:  -0.002506
hllcMixCnt - realMixCnt:  -1253
hllc cost time: 673 
********************begin r :{0.05},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.05  setA size: 500000 , setB size: 500000
generate data cost time: 820 
realMixCnt: 25131 , mixRate:0.050262
Map collection cost time: 185 
hllcMixCnt: 24850 , mixRatehllc:0.0497
mixRatehllc - mixRate:  -0.000562
hllcMixCnt - realMixCnt:  -281
hllc cost time: 647 
********************begin r :{0.1},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 5000000  ,r : 0.1  setA size: 500000 , setB size: 500000
generate data cost time: 691 
realMixCnt: 49911 , mixRate:0.099822
Map collection cost time: 187 
hllcMixCnt: 50951 , mixRatehllc:0.101902
mixRatehllc - mixRate:  0.002080
hllcMixCnt - realMixCnt:  1040
hllc cost time: 690 
********************begin r :{0.2},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 2500000  ,r : 0.2  setA size: 500000 , setB size: 500000
generate data cost time: 888 
realMixCnt: 250250 , mixRate:0.5005
Map collection cost time: 212 
hllcMixCnt: 249358 , mixRatehllc:0.498716
mixRatehllc - mixRate:  -0.001784
hllcMixCnt - realMixCnt:  -892
hllc cost time: 608 
********************begin r :{0.5},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.5  setA size: 500000 , setB size: 500000
generate data cost time: 840 
realMixCnt: 249691 , mixRate:0.499382
Map collection cost time: 230 
hllcMixCnt: 249833 , mixRatehllc:0.499666
mixRatehllc - mixRate:  0.000284
hllcMixCnt - realMixCnt:  142
hllc cost time: 714 

Process finished with exit code 0



文章评论

程序员的一天:一寸光阴一寸金
程序员的一天:一寸光阴一寸金
我是如何打败拖延症的
我是如何打败拖延症的
一个程序员的时间管理
一个程序员的时间管理
中美印日四国程序员比较
中美印日四国程序员比较
程序猿的崛起——Growth Hacker
程序猿的崛起——Growth Hacker
漫画:程序员的工作
漫画:程序员的工作
初级 vs 高级开发者 哪个性价比更高?
初级 vs 高级开发者 哪个性价比更高?
Java 与 .NET 的平台发展之争
Java 与 .NET 的平台发展之争
“肮脏的”IT工作排行榜
“肮脏的”IT工作排行榜
如何区分一个程序员是“老手“还是“新手“?
如何区分一个程序员是“老手“还是“新手“?
10个帮程序员减压放松的网站
10个帮程序员减压放松的网站
那些争议最大的编程观点
那些争议最大的编程观点
团队中“技术大拿”并非越多越好
团队中“技术大拿”并非越多越好
那些性感的让人尖叫的程序员
那些性感的让人尖叫的程序员
每天工作4小时的程序员
每天工作4小时的程序员
60个开发者不容错过的免费资源库
60个开发者不容错过的免费资源库
当下全球最炙手可热的八位少年创业者
当下全球最炙手可热的八位少年创业者
程序员眼里IE浏览器是什么样的
程序员眼里IE浏览器是什么样的
老程序员的下场
老程序员的下场
我跳槽是因为他们的显示器更大
我跳槽是因为他们的显示器更大
程序员应该关注的一些事儿
程序员应该关注的一些事儿
旅行,写作,编程
旅行,写作,编程
亲爱的项目经理,我恨你
亲爱的项目经理,我恨你
2013年美国开发者薪资调查报告
2013年美国开发者薪资调查报告
什么才是优秀的用户界面设计
什么才是优秀的用户界面设计
程序员必看的十大电影
程序员必看的十大电影
我的丈夫是个程序员
我的丈夫是个程序员
程序员都该阅读的书
程序员都该阅读的书
聊聊HTTPS和SSL/TLS协议
聊聊HTTPS和SSL/TLS协议
程序员和编码员之间的区别
程序员和编码员之间的区别
Web开发者需具备的8个好习惯
Web开发者需具备的8个好习惯
Google伦敦新总部 犹如星级庄园
Google伦敦新总部 犹如星级庄园
程序员最害怕的5件事 你中招了吗?
程序员最害怕的5件事 你中招了吗?
不懂技术不要对懂技术的人说这很容易实现
不懂技术不要对懂技术的人说这很容易实现
5款最佳正则表达式编辑调试器
5款最佳正则表达式编辑调试器
十大编程算法助程序员走上高手之路
十大编程算法助程序员走上高手之路
如何成为一名黑客
如何成为一名黑客
老美怎么看待阿里赴美上市
老美怎么看待阿里赴美上市
程序员周末都喜欢做什么?
程序员周末都喜欢做什么?
程序员的鄙视链
程序员的鄙视链
看13位CEO、创始人和高管如何提高工作效率
看13位CEO、创始人和高管如何提高工作效率
10个调试和排错的小建议
10个调试和排错的小建议
 程序员的样子
程序员的样子
要嫁就嫁程序猿—钱多话少死的早
要嫁就嫁程序猿—钱多话少死的早
编程语言是女人
编程语言是女人
“懒”出效率是程序员的美德
“懒”出效率是程序员的美德
Web开发人员为什么越来越懒了?
Web开发人员为什么越来越懒了?
鲜为人知的编程真相
鲜为人知的编程真相
写给自己也写给你 自己到底该何去何从
写给自己也写给你 自己到底该何去何从
软件开发程序错误异常ExceptionCopyright © 2009-2015 MyException 版权所有