数据获取的渠道:企业内部,机台,网站 etc..., 现使用R的rvest包进行网络数据的爬取
code:
library(rvest)
library(stringr)
library(tidyr)
library(dplyr)
url<-"https://www.liepin.com/zhaopin/?ckid=cbff44f512ec6739&fromSearchBtn=2°radeFlag=0&init=-1&headckid=cbff44f512ec6739&curPage=0"
page<-read_html(url)
# get positon
position<-page%>%html_nodes("ul.sojob-list div.sojob-item-main div.job-info h3")%>%html_text(trim=TRUE)
p<-position%>%data.frame()
names(p)<-c("position")
# get area
area<-page%>%html_nodes("ul.sojob-list div.sojob-item-main div.job-info a.area")%>%html_text(trim=TRUE)
a<-area%>%data.frame()
names(a)<-c("area")
# 提取了salary education experience
experience<-page%>%html_nodes("ul.sojob-list div.sojob-item-main div.job-info p.condition span")%>%html_text(trim=TRUE)
names(e)<-c("exp")
dt<-matrix(,length(experience)/3,3)
colnames(dt)<-c("salary","education","experence")
for(n in 1:3){
j<-1
i<-1
while(i<length(experience)){
dt[j,n]<-experience[i]
j<j+1
i<-i+3
}
}
# 循环查询
for(n in 1:10){
url<-str_c("https://www.liepin.com/zhaopin/?ckid=cbff44f512ec6739&fromSearchBtn=2°radeFlag=0&init=-1&headckid=cbff44f512ec6739&curPage=","a")
page<-read_html(url)
position<-page%>%html_nodes("ul.sojob-list div.sojob-item-main div.job-info h3")%>%html_text(trim=TRUE)
p1<-position%>%data.frame()
names(p1)<-c("position")
p<-rbind(p,p1)
#area
area<-page%>%html_nodes("ul.sojob-list div.sojob-item-main div.job-info a.area")%>%html_text(trim=TRUE)
a1<-position%>%data.frame()
names(a1)<-c("area")
a<-rbind(a,a1)
#experience
experience<-page%>%html_nodes("ul.sojob-list div.sojob-item-main div.job-info p.condition span")%>%html_text(trim=TRUE)
dt1<-matrix(,length(experience)/3,3)
colnames(dt1)<-c("salary","education","experence")
for(d in 1:3){
j<-1
i<-1
while(i<=length(e)){
dt[j,d]<-e[i]
j<-j+1
i<-i+3
}
}
dt<-rbind(dt,dt1)
}
work_info<-data.frame(p,a,dt)
}
# 数据可视化:
略...
声明:我要去上班所有作品(图文、音视频)均由用户自行上传分享,仅供网友学习交流,版权归原作者阿国所有,原文出处。若您的权利被侵害,请联系删除。
本文标题:(猎聘网招聘)(猎聘网招聘官方网站)
本文链接:https://www.51qsb.cn/article/m8obe.html