인류의 복지와 편익을 위한 인프라 건설을 주도하는토목공학과
제목
(2학년)빅데이터마이닝_기말고사 예상문제 및 주요 코딩(인공지능등)(박영훈 교수)
작성일
2022.11.29
작성자
부천대학교 토목과
# 기말고사_2번 install.packages("arules") library(arules) tr=read.delim("dataTransactions.tab", stringsAsFactors=FALSE) head(tr, n=20) #tr.filter=subset(tr, subset=!(corner %in% c("일반식품"))) #tr.filter=subset(tr, subset=!(corner %in% c("신발"))) tr.filter=subset(tr, subset=!(corner %in% c("일반식품","가구"))) head(tr.filter, n=20) tr.filter.uniq=unique(tr.filter) trans=as(split(tr.filter.uniq$corner, tr.filter.uniq$custid), "transactions") #corner별 custid 재정리 trans #rules=apriori(trans, parameter=list(support=0.15, confidence=0.75)) rules=apriori(trans, parameter=list(support=0.2, confidence=0.8)) summary(rules) inspect(rules) #rules.target=subset(rules, rhs %in% "가구" & lift>1.4) rules.target=subset(rules, rhs %in% "스포츠" & lift>1.4) inspect(sort(rules.target, by="confidence")) # 기말고사_3번 turkey=read.csv("turkey.csv") head(turkey, n=20) turkey=na.omit(turkey) head(turkey, n=20) install.packages("MASS") library(MASS) #model_lda=lda(TYPE~HUM+ULN, data=turkey) model_lda=lda(TYPE~HUM+RAD, data=turkey) model_lda predict(model_lda,data.frame("HUM"=c(150,150), "RAD"=c(135,150))) model_qda=qda(TYPE~HUM+RAD, data=turkey) model_qda predict(model_qda,data.frame("HUM"=c(150,150), "RAD"=c(135,150))) #기말고사_4번 install.packages("nnet") library(nnet) cb=read.delim("Hshopping2.txt", stringsAsFactors = FALSE) head(cb,n=20) str(cb) cb$성별=as.factor(cb$성별) cb$출연자=as.factor(cb$출연자) cb$반품여부=as.factor(cb$반품여부) str(cb) install.packages("caret", dependencies = TRUE) library(caret) set.seed(1) #inTrain=createDataPartition(y=cb$반품여부, p=0.7, list=FALSE) inTrain=createDataPartition(y=cb$반품여부, p=0.6, list=FALSE) cb.train=cb[inTrain,] cb.test=cb[-inTrain,] set.seed(1234567) #nn_model=nnet(반품여부~나이+성별+출연자, data=cb.train, size=5, maxit=100) nn_model=nnet(반품여부~나이+구매금액+출연자, data=cb.train, size=7, maxit=1000) install.packages("NeuralNetTools") library(NeuralNetTools) garson(nn_model) predicted=as.factor(predict(nn_model, newdata=cb.test, type="class")) confusionMatrix(predicted,cb.test$반품여부) # 기말고사_5번 install.packages("caret") install.packages("C50") install.packages("ROCR") install.packages('e1071', dependencies=TRUE) library(caret) library(C50) library(ROCR) library(e1071) cb=read.delim("Hshopping2.txt", stringsAsFactors=FALSE) head(cb) str(cb) cb$반품여부=factor(cb$반품여부) cb$성별=factor(cb$ 성별) cb$출연자=factor(cb$ 출연자) set.seed(5) #inTrain=createDataPartition(y=cb$반품여부, p=0.75, list=FALSE) inTrain=createDataPartition(y=cb$반품여부, p=0.6, list=FALSE) cb.train=cb[inTrain,] cb.test=cb[-inTrain,] dim(cb.train); dim(cb.test) #c5_options =C5.0Control(winnow = TRUE, noGlobalPruning= FALSE) c5_options =C5.0Control(winnow = FALSE, noGlobalPruning= FALSE) #winnow=TRUE : feature selection 적용(다중공선성, 중요도 등 고려 일부 변수 자동 제거) #c5_model =C5.0(반품여부~ 나이+성별+출연자, data=cb.train, control=c5_options, rules=FALSE) c5_model =C5.0(반품여부~ 나이+구매금액+출연자, data=cb.train, control=c5_options, rules=FALSE) summary(c5_model) plot(c5_model) cb.test$c5_pred =predict(c5_model, cb.test, type="class") cb.test$c5_pred_prob =predict(c5_model, cb.test, type="prob") head(cb.test) confusionMatrix(cb.test$c5_pred, cb.test$반품여부) trix(cb.test$c5_pred, cb.test$반품여부)