JustPaste.it

> Sys.setenv("HADOOP_CMD"="/home/aims1/hadoop/hadoop/bin/hadoop") > Sys.setenv("HADOOP_STREAMING"="/home/aims1/hadoop/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.7.3.jar") > > # load librarys > library("rmr2") > library("rhdfs") > > # initiate rhdfs package > hdfs.init() > > map <- function(k,lines) { + words.list <- strsplit(lines, '\\s') + words <- unlist(words.list) + return( keyval(words, 1) ) + } > > reduce <- function(word, counts) { + keyval(word, sum(counts)) + } > > wordcount <- function (input, output=NULL) { + mapreduce(input=input, output=output, input.format="text", map=map, reduce=reduce) + } > > ## read text files from folder example/wordcount/data > hdfs.root <- 'example/wordcount' > hdfs.data <- file.path(hdfs.root, 'data') > > ## save result in folder example/wordcount/out > hdfs.out <- file.path(hdfs.root, 'out1') > > ## Submit job > out <- wordcount(hdfs.data, hdfs.out) packageJobJar: [/tmp/hadoop-unjar2398584813314055559/] [] /tmp/streamjob5398396681685506489.jar tmpDir=null 17/03/03 11:43:07 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032 17/03/03 11:43:07 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032 17/03/03 11:43:08 INFO mapred.FileInputFormat: Total input paths to process : 1 17/03/03 11:43:08 INFO mapreduce.JobSubmitter: number of splits:2 17/03/03 11:43:09 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1488519508031_0002 17/03/03 11:43:09 INFO impl.YarnClientImpl: Submitted application application_1488519508031_0002 17/03/03 11:43:09 INFO mapreduce.Job: The url to track the job: http://aims1:8088/proxy/application_1488519508031_0002/ 17/03/03 11:43:09 INFO mapreduce.Job: Running job: job_1488519508031_0002 17/03/03 11:43:14 INFO mapreduce.Job: Job job_1488519508031_0002 running in uber mode : false 17/03/03 11:43:14 INFO mapreduce.Job: map 0% reduce 0% 17/03/03 11:43:18 INFO mapreduce.Job: Task Id : attempt_1488519508031_0002_m_000001_0, Status : FAILED Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1 at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322) at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535) at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61) at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 17/03/03 11:43:18 INFO mapreduce.Job: Task Id : attempt_1488519508031_0002_m_000000_0, Status : FAILED Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1 at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322) at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535) at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61) at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 17/03/03 11:43:23 INFO mapreduce.Job: Task Id : attempt_1488519508031_0002_m_000001_1, Status : FAILED Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1 at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322) at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535) at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61) at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) Container killed by the ApplicationMaster. 17/03/03 11:43:24 INFO mapreduce.Job: Task Id : attempt_1488519508031_0002_m_000000_1, Status : FAILED Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1 at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322) at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535) at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61) at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) Container killed by the ApplicationMaster. 17/03/03 11:43:28 INFO mapreduce.Job: map 50% reduce 0% 17/03/03 11:43:28 INFO mapreduce.Job: Task Id : attempt_1488519508031_0002_m_000001_2, Status : FAILED Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1 at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322) at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535) at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61) at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) Container killed by the ApplicationMaster. 17/03/03 11:43:29 INFO mapreduce.Job: map 0% reduce 0% 17/03/03 11:43:29 INFO mapreduce.Job: Task Id : attempt_1488519508031_0002_m_000000_2, Status : FAILED Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1 at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322) at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535) at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61) at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) Container killed by the ApplicationMaster. 17/03/03 11:43:32 INFO mapreduce.Job: map 100% reduce 100% 17/03/03 11:43:33 INFO mapreduce.Job: Job job_1488519508031_0002 failed with state FAILED due to: Task failed task_1488519508031_0002_m_000001 Job failed as tasks failed. failedMaps:1 failedReduces:0 17/03/03 11:43:33 INFO mapreduce.Job: Counters: 17 Job Counters Failed map tasks=7 Killed map tasks=1 Killed reduce tasks=1 Launched map tasks=8 Other local map tasks=6 Data-local map tasks=2 Total time spent by all maps in occupied slots (ms)=18687 Total time spent by all reduces in occupied slots (ms)=0 Total time spent by all map tasks (ms)=18687 Total time spent by all reduce tasks (ms)=0 Total vcore-milliseconds taken by all map tasks=18687 Total vcore-milliseconds taken by all reduce tasks=0 Total megabyte-milliseconds taken by all map tasks=19135488 Total megabyte-milliseconds taken by all reduce tasks=0 Map-Reduce Framework CPU time spent (ms)=0 Physical memory (bytes) snapshot=0 Virtual memory (bytes) snapshot=0 17/03/03 11:43:33 ERROR streaming.StreamJob: Job not successful! Streaming Command Failed!

 

Error in mr(map = map, reduce = reduce, combine = combine, vectorized.reduce, : hadoop streaming failed with error code 1

> > ## Fetch results from HDFS > results <- from.dfs(out) Error in to.dfs.path(input) : object 'out' not found > results.df <- as.data.frame(results, stringsAsFactors=F) Error in as.data.frame(results, stringsAsFactors = F) : object 'results' not found > colnames(results.df) <- c('word', 'count') Error in colnames(results.df) <- c("word", "count") : object 'results.df' not found > > head(results.df) Error in head(results.df) : error in evaluating the argument 'x' in selecting a method for function 'head': Error: object 'results.df' not found