వాడుకరి:Arjunaraoc/getmainnsbook.R
స్వరూపం
## get main name space book pages with subpages from top 1000 page views json file, last updated 20180503
## example argument url<- https://wikimedia.org/api/rest_v1/metrics/pageviews/top/te.wikisource.org/all-access/2018/04/all-days
getmainnsbook<-function(pvfile){
library(plyr)
library(jsonlite)
top<-fromJSON(pvfile,flatten=TRUE)
pv<-as.data.frame(top[[1]]$articles[[1]])
page_notmainindex<-grep(":",pv$article)
pvp<-pv[-page_notmainindex,]
#find list of all main pages with subpages as indicated by '/'
pvpb<-pvp$article[grep("/",pvp$article)]
#truncate to the main page and collect distinct ones
pvpb<-substr(pvpb,1,regexpr("/",pvpb)-1)
pvpbu<-unique(pvpb)
#add book title field if the page title starts with book_title
pvp$bt<-substr(pvp$article,1,regexpr("/",pvp$article)-1)
#add the main page for book
pvp$bt[pvp$article %in% pvpbu]<-pvp$article[pvp$article %in% pvpbu]
#summarize with total pageviews and number of entries and views/entry
bcviews<-ddply(pvp,.(bt),summarise, cviews=sum(views),npages=length(bt),nviewsperpage=round(sum(views)/length(bt),digits=2))
#remove top most entry corresponding to mainspace pages without subpages
bcviews<-bcviews[-(which(bcviews$bt=="")),]
bcviews<-bcviews[order(bcviews$nviewsperpage,decreasing=TRUE),]
names(bcviews)<-c("book with chapters","total views","number of pages", "number of views per page")
rownames(bcviews)<-NULL
bcviews
}