Wikipedia(R)

x
 
wiki <- read.df(sqlContext, path = "/mnt/hossein/wiki", source = "com.databricks.spark.csv", delimiter = "\t", header = "true")

xxxxxxxxxx
 
wiki
DataFrame[id:string, title:string, modified:string, text:string, user:string]

xxxxxxxxxx
 
head(wiki)
      id                                  title                modified
1  28436                                  Saint 2014-04-28 23:41:11.000
2 244338                                 T-1000 2014-04-27 00:55:34.000
3 251796 Battles of the Spanish\023American War 2014-03-23 12:58:29.000
4 259648      Ellicottville (village), New York 2014-03-24 21:34:42.000
5 671276                         Richard Prince 2014-04-25 03:15:59.000
6 680938                       Bengal Bush Lark 2013-12-22 21:00:08.000
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          ... dropped 157000 characters ...                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               Template:TaxoboxThe Bengal Bush Lark  or  Rufous-winged Bush Lark  (Mirafra assamica) is a small passerine bird.== Description ==It is short-tailed and has a strong stout bill. In size it is not as long as the Skylark, measuring about 15 centimeters. (See below for more.)== Range and population ==It is a resident breeder in the Indian subcontinent and southeast Asia, with an estimated global Extent of Occurrence of 100,000-1,000,000 square km. == Habits and habitat == The Bengal Bush Lark is a common bird of dry, open, stony country often with sparse shrubbery, and cultivated areas. It nests on the ground, laying three or four speckled eggs. This lark feeds primarily on seeds and insects, especially the latter during the breeding season.== Taxonomic changes == The differences within the genus Mirafra are often very subtle and confusing with many differences apparent only when specimens are examined in hand.The Bengal Bush Lark was earlier classified into several races, the Bengal race assamica and the Madras race affinis. These were subsequently split, on the basis of diagnostic song and display characters, into the Jerdon's Bush Lark (Mirafra affinis) and assamica in the strict sense. Mirafra (assamica) assamica is dark-streaked grey above, and buff below, with spotting on the breast and behind the eye. The wings are rufous. Jerdon's Bushlark has paler, greyish-brown underparts. The song of Jerdon's Bush Lark is a dry rattle given from its perch, while that of M. (a.) assamica is a repetition of thin disyllabic notes, delivered in a song-flight. Template:cite journal==References==Template:reflist== External links ==*Template:cite webImage:Bengal bushlark (Mirafra assamica) in Kolkata W IMG 4592.jpg| In Kolkata, West Bengal, India.Image:Bengal bushlark (Mirafra assamica) with Paddyfield Pipit (Anthuis rufulus) in Kolkata W IMG 4571.jpg|With Paddyfield Pipit (Anthus rufulus) in Kolkata, West Bengal, India.Image:Bengal bushlark (Mirafra assamica) in Kolkata W IMG 4599.jpg| In Kolkata, West Bengal, India.Image:Bengal bushlark (Mirafra assamica) in Kolkata W IMG 4596.jpg| In Kolkata, West Bengal, India.Image:Bengal Bushllark- Kolkata- preening I2 IMG 5000.jpg| Preening in  Kolkata, West Bengal, India.Image:Bengal Bushlark I IMG 2079.jpg| In  Kolkata, West Bengal, India.Image:Bengal Bushlark I IMG 1964.jpg| In  Kolkata, West Bengal, India.
           user
1 Brianschwartz
2      KConWiki
3     Guriezous
4  JMyrleFuller
5    Meatsgains
6  Snowmanradio

xxxxxxxxxx
 
wiki$Date <- to_date(wiki$modified)

xxxxxxxxxx
 
cache(wiki)
DataFrame[id:string, title:string, modified:string, text:string, user:string, Date:date]

xxxxxxxxxx
 
dim(wiki)
[1] 4072157       6

xxxxxxxxxx
 
install.packages("magrittr")
library(magrittr)
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'http://cran.us.r-project.org/src/contrib/magrittr_1.5.tar.gz'
Content type 'application/x-gzip' length 200504 bytes (195 KB)
==================================================
downloaded 195 KB

* installing *source* package ‘magrittr’ ...
** package ‘magrittr’ successfully unpacked and MD5 sums checked
** R
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** installing vignettes
** testing if installed package can be loaded
* DONE (magrittr)
The downloaded source packages are in
	‘/tmp/RtmppgaXuJ/downloaded_packages’

xxxxxxxxxx
 
modifications <- wiki %>% group_by("Date") %>% count %>% collect

xxxxxxxxxx
 
library(ggplot2)
options(repr.plot.height = 500)

xxxxxxxxxx
 
ggplot(modifications, aes(Date, count)) + geom_line() + theme_bw()

xxxxxxxxxx