close

  該範例摘自<<機器學習-算法原理與編程實踐>>,主要是告訴我們對於資料集的分佈趨勢,我們可以用"最小二乘法"求得回歸線(資料分佈趨勢線),然該法僅適用於線性回歸函數Y = aX + b . 但藉由這個實例亦可以看出我們可以用最小二乘法做為線性回歸資料集的預測算法,從中找出線性資料分佈的合理函數,進而預測資料分佈趨勢。

 

[資料集] Input File = regdataset.txt

0.635975 4.093119

0.552438 3.804358

0.855922 4.456531

0.083386 3.187049

0.975802 4.506176

0.181269 3.171914

0.129156 3.053996

0.605648 3.974659

0.301625 3.542525

0.698805 4.234199

0.226419 3.405937

0.519290 3.932469

0.354424 3.514051

0.118380 3.105317

0.512811 3.843351

0.236795 3.576074

0.353509 3.544471

0.481447 3.934625

0.060509 3.228226

0.174090 3.300232

0.806818 4.331785

0.531462 3.908166

0.853167 4.386918

0.304804 3.617260

0.612021 4.082411

0.620880 3.949470

0.580245 3.984041

0.742443 4.251907

0.110770 3.115214

0.742687 4.234319

0.574390 3.947544

0.986378 4.532519

0.294867 3.510392

0.472125 3.927832

0.872321 4.631825

0.843537 4.482263

0.864577 4.487656

0.341874 3.486371

0.097980 3.137514

0.757874 4.212660

0.877656 4.506268

0.457993 3.800973

0.475341 3.975979

0.848391 4.494447

0.746059 4.244715

0.153462 3.019251

0.694256 4.277945

0.498712 3.812414

0.023580 3.116973

0.976826 4.617363

0.624004 4.005158

0.472220 3.874188

0.390551 3.630228

0.021349 3.145849

0.173488 3.192618

0.971028 4.540226

0.595302 3.835879

0.097638 3.141948

0.745972 4.323316

0.676390 4.204829

0.488949 3.946710

0.982873 4.666332

0.296060 3.482348

0.228008 3.451286

0.671059 4.186388

0.379419 3.595223

0.285170 3.534446

0.236314 3.420891

0.629803 4.115553

0.770272 4.257463

0.493052 3.934798

0.631592 4.154963

0.965676 4.587470

0.598675 3.944766

0.351997 3.480517

0.342001 3.481382

0.661424 4.253286

0.140912 3.131670

0.373574 3.527099

0.223166 3.378051

0.908785 4.578960

0.915102 4.551773

0.410940 3.634259

0.754921 4.167016

0.764453 4.217570

0.101534 3.237201

0.780368 4.353163

0.819868 4.342184

0.173990 3.236950

0.330472 3.509404

0.162656 3.242535

0.476283 3.907937

0.636391 4.108455

0.758737 4.181959

0.778372 4.251103

0.936287 4.538462

0.510904 3.848193

0.515737 3.974757

0.437823 3.708323

0.828607 4.385210

0.556100 3.927788

0.038209 3.187881

0.321993 3.444542

0.067288 3.199263

0.774989 4.285745

0.566077 3.878557

0.796314 4.155745

0.746600 4.197772

0.360778 3.524928

0.397321 3.525692

0.062142 3.211318

0.379250 3.570495

0.248238 3.462431

0.682561 4.206177

0.355393 3.562322

0.889051 4.595215

0.733806 4.182694

0.153949 3.320695

0.036104 3.122670

0.388577 3.541312

0.274481 3.502135

0.319401 3.537559

0.431653 3.712609

0.960398 4.504875

0.083660 3.262164

0.122098 3.105583

0.415299 3.742634

0.854192 4.566589

0.925574 4.630884

0.109306 3.190539

0.805161 4.289105

0.344474 3.406602

0.769116 4.251899

0.182003 3.183214

0.225972 3.342508

0.413088 3.747926

0.964444 4.499998

0.203334 3.350089

0.285574 3.539554

0.850209 4.443465

0.061561 3.290370

0.426935 3.733302

0.389376 3.614803

0.096918 3.175132

0.148938 3.164284

0.893738 4.619629

0.195527 3.426648

0.407248 3.670722

0.224357 3.412571

0.045963 3.110330

0.944647 4.647928

0.756552 4.164515

0.432098 3.730603

0.990511 4.609868

0.649699 4.094111

0.584879 3.907636

0.785934 4.240814

0.029945 3.106915

0.075747 3.201181

0.408408 3.872302

0.583851 3.860890

0.497759 3.884108

0.421301 3.696816

0.140320 3.114540

0.546465 3.791233

0.843181 4.443487

0.295390 3.535337

0.825059 4.417975

0.946343 4.742471

0.350404 3.470964

0.042787 3.113381

0.352487 3.594600

0.590736 3.914875

0.120748 3.108492

0.143140 3.152725

0.511926 3.994118

0.496358 3.933417

0.382802 3.510829

0.252464 3.498402

0.845894 4.460441

0.132023 3.245277

0.442301 3.771067

0.266889 3.434771

0.008575 2.999612

0.897632 4.454221

0.533171 3.985348

0.285243 3.557982

0.377258 3.625972

0.486995 3.922226

0.305993 3.547421

0.277528 3.580944

0.750899 4.268081

0.694756 4.278096

0.870158 4.517640

0.276457 3.555461

0.017761 3.055026

0.802046 4.354819

0.559275 3.894387

0.941305 4.597773

0.856877 4.523616

 

[python] minsqrt.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from numpy import *
import matplotlib.pyplot as plt

def loadDataSet(filename):
    X = []
    Y = []
    fr = open(filename)
    """
    File data sample:
0.635975    4.093119
0.552438    3.804358
0.855922    4.456531
0.083386    3.187049
0.975802    4.506176
    """
    for line in fr.readlines():
        curLine = line.strip().split('\t')   #0.635975  4.093119
        X.append(float(curLine[0])) #0.635975
        Y.append(float(curLine[1])) #4.093119
    return X,Y

def plotscatter(Xmat,Ymat,a,b,plt):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(Xmat,Ymat,c='blue',marker='o')
    Xmat.sort()
    Yhat = [a*float(xi)+b for xi in Xmat] #calculate predict value

    plt.plot(Xmat,Yhat,'r')
    plt.show()

    return yhat

if __name__ == "__main__":

    Xmat, Ymat = loadDataSet('regdataset.txt')
    meanX = mean(Xmat)
    meanY = mean(Ymat)
    dx = Xmat - meanX
    dy = Ymat - meanY

    sumXY = vdot(dx,dy)
    sqX = sum(power(dx,2))

    a = sumXY/sqX
    b = meanY - a*meanX

    print("%f,%f"%(a,b))

    plotscatter(Xmat,Ymat,a,b,plt)

 

 

[結果]

PS D:\Lab\ScriptLab\ML> python minsqrt.py

1.668743,              3.007722

2018917日星期一

arrow
arrow
    創作者介紹
    創作者 jackterrylau 的頭像
    jackterrylau

    儒道哲學的浪漫人生

    jackterrylau 發表在 痞客邦 留言(0) 人氣()