వాడుకరి:Arjunaraoc/getmainnsbook.R

వికీసోర్స్ నుండి

## get main name space book pages with subpages from top 1000 page views json file, last updated 20180503
## example argument url<- https://wikimedia.org/api/rest_v1/metrics/pageviews/top/te.wikisource.org/all-access/2018/04/all-days
getmainnsbook<-function(pvfile){
    library(plyr)
    library(jsonlite)
    top<-fromJSON(pvfile,flatten=TRUE)
    pv<-as.data.frame(top[[1]]$articles[[1]])
    page_notmainindex<-grep(":",pv$article)
    pvp<-pv[-page_notmainindex,]
#find list of all main pages with subpages as indicated by '/'
    pvpb<-pvp$article[grep("/",pvp$article)]
#truncate to the main page and collect distinct ones
    pvpb<-substr(pvpb,1,regexpr("/",pvpb)-1)
    pvpbu<-unique(pvpb)
#add book title field if the page title starts with book_title
    pvp$bt<-substr(pvp$article,1,regexpr("/",pvp$article)-1)
#add the main page for book
    pvp$bt[pvp$article %in% pvpbu]<-pvp$article[pvp$article %in% pvpbu]
#summarize with total pageviews and number of entries and views/entry
    bcviews<-ddply(pvp,.(bt),summarise, cviews=sum(views),npages=length(bt),nviewsperpage=round(sum(views)/length(bt),digits=2))
#remove top most entry corresponding to mainspace pages without subpages
    bcviews<-bcviews[-(which(bcviews$bt=="")),]
    bcviews<-bcviews[order(bcviews$nviewsperpage,decreasing=TRUE),]
    names(bcviews)<-c("book with chapters","total views","number of pages", "number of views per page")
    rownames(bcviews)<-NULL
    bcviews
}

ఫలితం ఉదాహరణ(స్వల్ప మార్పుల తరువాత)[మార్చు]