IBM Dublin Research Lab
nprocs() ==1 && addprocs()
@everywhere import CombineML.Util
@everywhere import CombineML.Transformers
@everywhere import RDatasets
@everywhere CU=CombineML.Util
@everywhere CT=CombineML.Transformers
@everywhere RD=RDatasets
#Scikit wrapper that provides access to scikit learners
@everywhere sk_gblearner = CT.SKLLearner(
Dict( :output => :class,
:learner => “GradientBoostingClassifier”,
:impl_options => Dict()
) )
@everywhere best_learner = CT.BestLearner(
Dict(
:learners => [sk_gblearner,CT.PrunedTree(),CT.RandomForest()],
:output => :class, :score_type => Real, :learner_options_grid => nothing
)
)
@everywhere vote_learner = CT.VoteEnsemble(
Dict(
:output => :class,
:learners => [
CT.PrunedTree(), CT.DecisionStumpAdaboost(),
CT.RandomForest(),sk_gblearner,best_learner
]
)
)
@everywhere stack_learner = CT.StackEnsemble(
Dict(
:learners => [
CT.PrunedTree(), CT.RandomForest(),
CT.DecisionStumpAdaboost(),
best_learner, sk_gblearner
],
:stacker => CT.RandomForest()
) )
pipeline = CT.Pipeline(
Dict(
:transformers => [
CT.OneHotEncoder(), # Encodes nominal features into numeric
CT.Imputer(), # Imputes NA values
CT.StandardScaler(), # Standardizes features
learner
]
) )
dataset = RD.dataset(“datasets”, “iris”)
instances = Array(dataset[:, 1:(end-1)])
labels = Array(dataset[:, end])
(train_ind, test_ind) = CU.holdout(size(instances, 1), 0.3)
CT.fit!(pipeline, instances[train_ind, :], labels[train_ind])
predictions = CT.transform!(pipeline, instances[test_ind, :])
result = CU.score(:accuracy, labels[test_ind], predictions)
@everywhere function predict(learner)
dataset = RD.dataset(“datasets”, “iris”)
instances = Array(dataset[:, 1:(end-1)])
labels = Array(dataset[:, end])
(train_ind, test_ind) = CU.holdout(size(instances, 1), 0.3)
pipeline = CT.Pipeline(
Dict(
:transformers => [
CT.OneHotEncoder(), # Encodes nominal features into numeric
CT.Imputer(), # Imputes NA values
CT.StandardScaler(), # Standardizes features
CT.PCA(),
learner
]
)
)
CT.fit!(pipeline, instances[train_ind, :], labels[train_ind]);
predictions = CT.transform!(pipeline, instances[test_ind, :]);
result = CU.score(:accuracy, labels[test_ind], predictions)
return result
end
using DataFrames
function main(trials)
learners=Dict(
:gradientboosting=>sk_gblearner, :randomforest=>CT.RandomForest(),
:adaboost=>CT.DecisionStumpAdaboost(), :votelearner=>vote_learner,
:bestlearner=>best_learner, :stacklearner=>stack_learner )
models=collect(keys(learners))
ctable=@parallel (vcat) for model in models
acc=@parallel (vcat) for i=1:trials
res=predict(learners[model])
println(model,” => “,round(res))
res
end
[model round(mean(acc)) round(std(acc)) length(acc)]
end
sorted=sortrows(ctable,by=(x)->x[2],rev=true) | > DataFrame |
rename!(sorted,Dict(:x1=>:model,:x2=>:mean_acc,:x3=>:std_acc,:x4=>:trials))
return sorted
end
const trials = 5
res = main(trials)
@show res
Learners have only been tested on instances with numeric features.
Inconsistencies may result in using nominal features directly without a numeric transformation (i.e. OneHotEncoder).
I am a research scientist at the IBM Dublin Research Lab working in the areas of analytics, datamining, machine learning, and AI. I finished my Doctor of Engineering degree from the Toyohashi University of Technology in Japan (2005). I have a Master’s degree in Computer Science majoring in Artificial Intelligence (Ateneo de Manila University, 1995) and a Bachelor’s degree in Applied Mathematics (University of the Philippines in the Visayas, 1991). I used to work as a technical staff for two years in the Neuroinformatics Lab of RIKEN Brain Science Institute, Japan before finishing my DEng degree. I spent a total of 4 years as a Postdoctoral Fellow in the National University of Singapore and the National Neuroscience Institute working on diverse topics such as context-aware reasoning, datamining models for activity recognition in smarthome environment, detecting biomarkers for Parkinson’s Disease by image processing of fMRI and DTI images, and automated diagnosis of movement disorder for intelligent healthcare. Moreover, I held an Asst. Professorship for a total of 6 years in the University of the Philippines and Ateneo de Manila University. My research interests include datamining, optimization, development of intelligent agents using machine learning and evolutionary computation, neuroinformatics, and biomedical engineering.