Machine Learning/kdd r

From Noisebridge
Jump to navigation Jump to search
algebra <- read.csv("algebra_2008_2009_train.txt_sample_1000_random_students.txt", sep="\t", header=TRUE)
str(algebra)

algebra_test <- read.csv("algebra_2008_2009_test.txt", sep="\t", header=TRUE)
str(algebra_test)

# Random Forest Fail
library(randomForest)
my_model <- randomForest(as.factor(Correct.First.Attempt) ~ Step.Name + Anon.Student.Id + Problem.Hierarchy + Problem.Name, data=algebra)

# glm fail
my_model <- glm(Correct.First.Attempt ~ Step.Name + Anon.Student.Id + Problem.Hierarchy + Problem.Name, family="binomial", data=algebra)

forecasts = predict(my_model, newdata=algebra_test)

forecast_frame = data.frame(Row=algebra_test$Row,Correct.First.Attempt=forecasts)

write_table(forecast_frame, file="algenra_2008_2009_submission_r.txt", sep="\t", header=TRUE)