వాడుకరి:Arjunaraoc/getmainnsbook.R
స్వరూపం
## get main name space book pages with subpages from top 1000 page views json file, last updated 20180503 ## example argument url<- https://wikimedia.org/api/rest_v1/metrics/pageviews/top/te.wikisource.org/all-access/2018/04/all-days getmainnsbook<-function(pvfile){ library(plyr) library(jsonlite) top<-fromJSON(pvfile,flatten=TRUE) pv<-as.data.frame(top[[1]]$articles[[1]]) page_notmainindex<-grep(":",pv$article) pvp<-pv[-page_notmainindex,] #find list of all main pages with subpages as indicated by '/' pvpb<-pvp$article[grep("/",pvp$article)] #truncate to the main page and collect distinct ones pvpb<-substr(pvpb,1,regexpr("/",pvpb)-1) pvpbu<-unique(pvpb) #add book title field if the page title starts with book_title pvp$bt<-substr(pvp$article,1,regexpr("/",pvp$article)-1) #add the main page for book pvp$bt[pvp$article %in% pvpbu]<-pvp$article[pvp$article %in% pvpbu] #summarize with total pageviews and number of entries and views/entry bcviews<-ddply(pvp,.(bt),summarise, cviews=sum(views),npages=length(bt),nviewsperpage=round(sum(views)/length(bt),digits=2)) #remove top most entry corresponding to mainspace pages without subpages bcviews<-bcviews[-(which(bcviews$bt=="")),] bcviews<-bcviews[order(bcviews$nviewsperpage,decreasing=TRUE),] names(bcviews)<-c("book with chapters","total views","number of pages", "number of views per page") rownames(bcviews)<-NULL bcviews }