인류의 복지와 편익을 위한 인프라 건설을 주도하는토목공학과
제목
(2학년) 빅데이터마이닝_기말시험 대비(박영훈 교수)
작성일
2025.12.09
작성자
부천대학교 토목공학과
주요코딩 # 2번 install.packages("arules") library(arules) tr=read.delim("dataTransactions.tab", stringsAsFactors=FALSE, fileEncoding = "euc-kr") head(tr, n=20) tr.filter=subset(tr, subset=!(corner %in% c("일반식품","가구"))) head(tr.filter, n=20) tr.filter.uniq=unique(tr.filter) trans=as(split(tr.filter.uniq$corner, tr.filter.uniq$custid), "transactions") rules=apriori(trans, parameter=list(support=0.2, confidence=0.8)) summary(rules) rules.target=subset(rules, rhs %in% "스포츠" & lift>1.5) inspect(sort(rules.target, by="confidence")) # 3번 turkey=read.csv("turkey.csv") head(turkey, n=20) turkey=na.omit(turkey) head(turkey, n=20) install.packages("MASS") library(MASS) model1=lda(TYPE~HUM+RAD, data=turkey) model1 predict(model1,data.frame("HUM"=c(150,150), "RAD"=c(135,150))) model2=qda(TYPE~HUM+RAD, data=turkey) model2 predict(model2,data.frame("HUM"=c(150,150), "RAD"=c(135,150))) #4번 install.packages("nnet") library(nnet) cb=read.delim("Hshopping3.txt", stringsAsFactors = FALSE, fileEncoding = "euc-kr") head(cb) cb$sex=factor(cb$sex) cb$corner=factor(cb$corner) cb$refund=factor(cb$refund) str(cb) install.packages("caret", dependencies = TRUE) library(caret) set.seed(1) inTrain=createDataPartition(y=cb$refund, p=0.6, list=FALSE) cb.train=cb[inTrain,] cb.test=cb[-inTrain,] set.seed(1234567) nn_model=nnet(refund~age+money+corner, data=cb.train, size=7, maxit=1000) install.packages("NeuralNetTools") library(NeuralNetTools) garson(nn_model) predicted=as.factor(predict(nn_model, newdata=cb.test, type="class")) confusionMatrix(predicted,cb.test$refund) # 5 set.seed(5) inTrain=createDataPartition(y=cb$refund, p=0.6, list=FALSE) cb.train=cb[inTrain,] cb.test=cb[-inTrain,] dim(cb.train); dim(cb.test) install.packages("C50") library(C50) c5_options =C5.0Control(winnow = FALSE, noGlobalPruning= FALSE) #winnow=TRUE : feature selection 적용(다중공선성, 중요도 등 고려 일부 변수 자동 제거) c5_model =C5.0(refund~ age+money+corner, data=cb.train, control=c5_options, rules=FALSE) summary(c5_model) #plot(c5_model) cb.test$c5_pred =predict(c5_model, cb.test, type="class") cb.test$c5_pred_prob =predict(c5_model, cb.test, type="prob") head(cb.test) confusionMatrix(cb.test$c5_pred, cb.test$refund)