hadoop - search a given word in a text File using MapReduce in JAVA in ubuntu 16.04 -


i have make project find given word (string). string inputted user. find occurrence of word in particular text file stored in hdfs. output should tell presence of word string.

package stringsearchjob; import java.io.ioexception; import java.util.scanner;  import org.apache.hadoop.conf.configuration; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapred.jobclient; import org.apache.hadoop.mapred.jobconf; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.mapper; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;  public class stringsearch{     public static void main(string argv[]) throws exception {         try {             if (argv.length<3) {                 system.err.println("give input/ output/ keyword!");                 return;             }             jobconf conf = new jobconf(stringsearch.class);             job job = new job(conf,"stringsearch");              fileinputformat.addinputpath(job, new path(argv[0]));             fileoutputformat.setoutputpath(job, new path(argv[1]));             conf.set("search", argv[2]);              job.setjarbyclass(stringsearch.class);             job.setmapperclass(wordmapper.class);             job.setnumreducetasks(0);              job.setmapoutputkeyclass(text.class);             job.setmapoutputvalueclass(intwritable.class);              job.setoutputkeyclass(text.class);             job.setoutputvalueclass(intwritable.class);              jobclient.runjob(conf);              job.waitforcompletion(true);        }            catch (exception e) {            e.printstacktrace();        }   }       public static class wordmapper extends mapper<longwritable, text, text, intwritable>{      @override      public void map(longwritable key, text value, context context) throws ioexception, interruptedexception {             try {                 configuration conf = context.getconfiguration();                 string search = conf.get("search");                 string line = value.tostring();                 scanner scanner = new scanner(line);                 while (scanner.hasnext()) {                     if (line.contains(search)) {                         string line1 = scanner.next();                         context.write(new text(line1), new intwritable(1));                     }                 }                 scanner.close();             }             catch (ioexception e){                 e.printstacktrace();             }             catch (interruptedexception e){                 e.printstacktrace();             }         }         }     } 

is code wrong? because output on ubuntu-16.04 terminal not correct. steps followed follows:

  1. after wring above code, exported runnable jar file named stringsearch.jar. class name stringsearch.
  2. now, on terminal wrote following commands:

    hadoop fs -mkdir /user hadoop fs -mkdir /user/hduser hadoop fs -mkdir /user/hduser/stringsearch hadoop fs -mkdir stringsearch/input hadoop -fs -copyfromlocal sample.txt stringsearch/input hadoop jar stringsearchnew.jar stringsearch /user/hduser/stringsearch/input user/hduser/stringsearch/output 'lord' 
  3. and getting errors follows.

    17/08/20 19:17:35 warn util.nativecodeloader: unable load native-hadoop library platform... using builtin-java classes applicable 17/08/20 19:17:41 info configuration.deprecation: session.id deprecated. instead, use dfs.metrics.session-id 17/08/20 19:17:41 info jvm.jvmmetrics: initializing jvm metrics processname=jobtracker, sessionid= 17/08/20 19:17:41 info jvm.jvmmetrics: cannot initialize jvm metrics processname=jobtracker, sessionid= - initialized exception in thread "main" org.apache.hadoop.mapred.invalidjobconfexception: output directory not set in jobconf.         @ org.apache.hadoop.mapred.fileoutputformat.checkoutputspecs(fileoutputformat.java:117)         @ org.apache.hadoop.mapreduce.jobsubmitter.checkspecs(jobsubmitter.java:268)         @ org.apache.hadoop.mapreduce.jobsubmitter.submitjobinternal(jobsubmitter.java:139)         @ org.apache.hadoop.mapreduce.job$10.run(job.java:1290)         @ org.apache.hadoop.mapreduce.job$10.run(job.java:1287)         @ java.security.accesscontroller.doprivileged(native method)         @ javax.security.auth.subject.doas(subject.java:422)         @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1698)         @ org.apache.hadoop.mapreduce.job.submit(job.java:1287)         @ org.apache.hadoop.mapred.jobclient$1.run(jobclient.java:575)         @ org.apache.hadoop.mapred.jobclient$1.run(jobclient.java:570)         @ java.security.accesscontroller.doprivileged(native method)         @ javax.security.auth.subject.doas(subject.java:422)         @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1698)         @ org.apache.hadoop.mapred.jobclient.submitjobinternal(jobclient.java:570)         @ org.apache.hadoop.mapred.jobclient.submitjob(jobclient.java:561)         @ org.apache.hadoop.mapred.jobclient.runjob(jobclient.java:870)         @ stringsearchjob.stringsearch.main(stringsearch.java:43)         @ sun.reflect.nativemethodaccessorimpl.invoke0(native method)         @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:62)         @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43)         @ java.lang.reflect.method.invoke(method.java:498)         @ org.apache.hadoop.util.runjar.run(runjar.java:221)         @ org.apache.hadoop.util.runjar.main(runjar.java:136) 

i learned how use hadoop mapreduce internet only. when tried make program in java after going through other similar answers, didn't gave output. complete newbie hadoop , benefit if please me resort issue. don't what's wrong in here!


after reading answer, edited code , got following errors:

    17/08/24 05:01:30 warn util.nativecodeloader: unable load native-hadoop library platform... using builtin-java classes applicable     exception in thread "main" java.lang.reflect.invocationtargetexception @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:62) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43) @ java.lang.reflect.method.invoke(method.java:498) @ org.eclipse.jdt.internal.jarinjarloader.jarrsrcloader.main(jarrsrcloader.java:58) @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:62) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43) @ java.lang.reflect.method.invoke(method.java:498) @ org.apache.hadoop.util.runjar.run(runjar.java:221) @ org.apache.hadoop.util.runjar.main(runjar.java:136) caused by: java.io.ioexception: no filesystem scheme: hdfs @ org.apache.hadoop.fs.filesystem.getfilesystemclass(filesystem.java:2660) @ org.apache.hadoop.fs.filesystem.createfilesystem(filesystem.java:2667) @ org.apache.hadoop.fs.filesystem.access$200(filesystem.java:94) @ org.apache.hadoop.fs.filesystem$cache.getinternal(filesystem.java:2703) @ org.apache.hadoop.fs.filesystem$cache.get(filesystem.java:2685) @ org.apache.hadoop.fs.filesystem.get(filesystem.java:373) @ org.apache.hadoop.fs.filesystem.get(filesystem.java:172) @ org.apache.hadoop.fs.filesystem.get(filesystem.java:357) @ org.apache.hadoop.fs.path.getfilesystem(path.java:295) @ org.apache.hadoop.mapreduce.lib.input.fileinputformat.addinputpath(fileinputformat.java:520) @ stringsearchjob.stringsearch.main(stringsearch.java:28) ... 11 more 

set input , output directory jobconf object not job object

you must change below :

 fileinputformat.setinputpaths(conf /*from job conf*/, new path(args[0]));  fileoutputformat.setoutputpath(conf /*from job conf*/, new path(args[1])); 

so modified code should below:

if (argv.length<3) {                 system.err.println("give input/ output/ keyword!");                 return;             }             jobconf conf = new jobconf(stringsearch.class);             job job = new job(conf,"stringsearch");              fileinputformat.setinputpaths(conf, new path(args[0]));      fileoutputformat.setoutputpath(conf, new path(args[1]));             conf.set("search", argv[2]);              job.setjarbyclass(stringsearch.class);             job.setmapperclass(wordmapper.class);             job.setnumreducetasks(0);              job.setmapoutputkeyclass(text.class);             job.setmapoutputvalueclass(intwritable.class);              job.setoutputkeyclass(text.class);             job.setoutputvalueclass(intwritable.class);              jobclient.runjob(conf);              job.waitforcompletion(true); 

Comments

Popular posts from this blog

ubuntu - PHP script to find files of certain extensions in a directory, returns populated array when run in browser, but empty array when run from terminal -

php - How can i create a user dashboard -

javascript - How to detect toggling of the fullscreen-toolbar in jQuery Mobile? -