Tutorial on data mining for java.net
Source code name: "datamining_tutorial.py"
Programming language: Python
Topic: Data mining/regression
DMelt Version 1.4. Last modified: 06/14/1970. License: Free
https://datamelt.org/code/cache/datamining_tutorial_8580.py
To run this script using the DataMelt IDE, copy the above URL link to the menu [File]→[Read script from URL] of the DMelt IDE.


# Data Mining using DMelt Tutorial. 
# S.Chekanov. April 2011. Updated for DMelt on Jubne 2016 
 
from jhplot import *
from jhplot.stat  import LinReg
from  java.awt import Color

# read data from external file
pn=PND('data','http://datamelt.org/examples/data/pnd.d')
print pn.toString()

p0=pn.getP0D(1)    # extract 2nd column and put to a 1D array
print p0.getStat() # print statistics
print p0.variance()
h1=p0.getH1D(10)   # make a histogram with 10 bins


p1=pn.getP1D(1,2)   # extract column 2 and 3 
c1=HPlot("Plot",600,400,2,1)
c1.visible()
c1.cd(1,1)          # go to the first drawing region 
c1.setAutoRange()
c1.draw(h1)


c1.cd(2,1)       # go to second drawing region
c1.setAutoRange()
r = LinReg(p1)
print "Intercept=",r.getIntercept(), "+/-",r.getInterceptError() 
print "Slope=",r.getSlope(),"+/-",r.getSlopeError() 

# create a string with a*x+b function
func='%4.2f*x+%4.2f' % (r.getSlope(),r.getIntercept())
f1=F1D( func, p1.getMin(0), p1.getMax(0))    # define a function in the range of the data

p=r.getPredictionBand(Color.green) # calculate the prediction band
p.setLegend(False)                 # do not show the legend
p.setErrColor(Color.green)         # color for error bars
c1.draw(p)
c1.draw(p1)                        # redraw data and the function 
c1.draw(f1)                                    
c1.export("tutorial_dmin1.eps")    # make image file (EPS) 


p2=pn.getP2D(0,1,2)  # extract 1,2,3 columns
p3=pn.getP2D(0,2,3)  # extract 1,3,4 columns
c2=HPlot3D("Plot",600,400,2,1)
c2.visible()
c2.setAutoRange()
c2.cd(1,1)
c2.setBoxColor(Color.white)
c2.draw(p2)

c2.cd(2,1)
c2.setBoxColor(Color(200,210,210))
c2.setAutoRange()
c2.draw(p3)
c2.export("tutorial_dmin2.eps")