hadoop - search a given word in a text File using MapReduce in JAVA in ubuntu 16.04 -
i have make project find given word (string). string inputted user. find occurrence of word in particular text file stored in hdfs. output should tell presence of word string.
package stringsearchjob; import java.io.ioexception; import java.util.scanner; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapred.jobclient; import org.apache.hadoop.mapred.jobconf; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.mapper; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat; public class stringsearch{ public static void main(string argv[]) throws exception { try { if (argv.length<3) { system.err.println("give input/ output/ keyword!"); return; } jobconf conf = new jobconf(stringsearch.class); job job = new job(conf,"stringsearch"); fileinputformat.addinputpath(job, new path(argv[0])); fileoutputformat.setoutputpath(job, new path(argv[1])); conf.set("search", argv[2]); job.setjarbyclass(stringsearch.class); job.setmapperclass(wordmapper.class); job.setnumreducetasks(0); job.setmapoutputkeyclass(text.class); job.setmapoutputvalueclass(intwritable.class); job.setoutputkeyclass(text.class); job.setoutputvalueclass(intwritable.class); jobclient.runjob(conf); job.waitforcompletion(true); } catch (exception e) { e.printstacktrace(); } } public static class wordmapper extends mapper<longwritable, text, text, intwritable>{ @override public void map(longwritable key, text value, context context) throws ioexception, interruptedexception { try { configuration conf = context.getconfiguration(); string search = conf.get("search"); string line = value.tostring(); scanner scanner = new scanner(line); while (scanner.hasnext()) { if (line.contains(search)) { string line1 = scanner.next(); context.write(new text(line1), new intwritable(1)); } } scanner.close(); } catch (ioexception e){ e.printstacktrace(); } catch (interruptedexception e){ e.printstacktrace(); } } } }
is code wrong? because output on ubuntu-16.04 terminal not correct. steps followed follows:
- after wring above code, exported runnable jar file named stringsearch.jar. class name stringsearch.
now, on terminal wrote following commands:
hadoop fs -mkdir /user hadoop fs -mkdir /user/hduser hadoop fs -mkdir /user/hduser/stringsearch hadoop fs -mkdir stringsearch/input hadoop -fs -copyfromlocal sample.txt stringsearch/input hadoop jar stringsearchnew.jar stringsearch /user/hduser/stringsearch/input user/hduser/stringsearch/output 'lord'
and getting errors follows.
17/08/20 19:17:35 warn util.nativecodeloader: unable load native-hadoop library platform... using builtin-java classes applicable 17/08/20 19:17:41 info configuration.deprecation: session.id deprecated. instead, use dfs.metrics.session-id 17/08/20 19:17:41 info jvm.jvmmetrics: initializing jvm metrics processname=jobtracker, sessionid= 17/08/20 19:17:41 info jvm.jvmmetrics: cannot initialize jvm metrics processname=jobtracker, sessionid= - initialized exception in thread "main" org.apache.hadoop.mapred.invalidjobconfexception: output directory not set in jobconf. @ org.apache.hadoop.mapred.fileoutputformat.checkoutputspecs(fileoutputformat.java:117) @ org.apache.hadoop.mapreduce.jobsubmitter.checkspecs(jobsubmitter.java:268) @ org.apache.hadoop.mapreduce.jobsubmitter.submitjobinternal(jobsubmitter.java:139) @ org.apache.hadoop.mapreduce.job$10.run(job.java:1290) @ org.apache.hadoop.mapreduce.job$10.run(job.java:1287) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:422) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1698) @ org.apache.hadoop.mapreduce.job.submit(job.java:1287) @ org.apache.hadoop.mapred.jobclient$1.run(jobclient.java:575) @ org.apache.hadoop.mapred.jobclient$1.run(jobclient.java:570) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:422) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1698) @ org.apache.hadoop.mapred.jobclient.submitjobinternal(jobclient.java:570) @ org.apache.hadoop.mapred.jobclient.submitjob(jobclient.java:561) @ org.apache.hadoop.mapred.jobclient.runjob(jobclient.java:870) @ stringsearchjob.stringsearch.main(stringsearch.java:43) @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:62) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43) @ java.lang.reflect.method.invoke(method.java:498) @ org.apache.hadoop.util.runjar.run(runjar.java:221) @ org.apache.hadoop.util.runjar.main(runjar.java:136)
i learned how use hadoop mapreduce internet only. when tried make program in java after going through other similar answers, didn't gave output. complete newbie hadoop , benefit if please me resort issue. don't what's wrong in here!
after reading answer, edited code , got following errors:
17/08/24 05:01:30 warn util.nativecodeloader: unable load native-hadoop library platform... using builtin-java classes applicable exception in thread "main" java.lang.reflect.invocationtargetexception @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:62) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43) @ java.lang.reflect.method.invoke(method.java:498) @ org.eclipse.jdt.internal.jarinjarloader.jarrsrcloader.main(jarrsrcloader.java:58) @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:62) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43) @ java.lang.reflect.method.invoke(method.java:498) @ org.apache.hadoop.util.runjar.run(runjar.java:221) @ org.apache.hadoop.util.runjar.main(runjar.java:136) caused by: java.io.ioexception: no filesystem scheme: hdfs @ org.apache.hadoop.fs.filesystem.getfilesystemclass(filesystem.java:2660) @ org.apache.hadoop.fs.filesystem.createfilesystem(filesystem.java:2667) @ org.apache.hadoop.fs.filesystem.access$200(filesystem.java:94) @ org.apache.hadoop.fs.filesystem$cache.getinternal(filesystem.java:2703) @ org.apache.hadoop.fs.filesystem$cache.get(filesystem.java:2685) @ org.apache.hadoop.fs.filesystem.get(filesystem.java:373) @ org.apache.hadoop.fs.filesystem.get(filesystem.java:172) @ org.apache.hadoop.fs.filesystem.get(filesystem.java:357) @ org.apache.hadoop.fs.path.getfilesystem(path.java:295) @ org.apache.hadoop.mapreduce.lib.input.fileinputformat.addinputpath(fileinputformat.java:520) @ stringsearchjob.stringsearch.main(stringsearch.java:28) ... 11 more
set input , output directory jobconf object not job object
you must change below :
fileinputformat.setinputpaths(conf /*from job conf*/, new path(args[0])); fileoutputformat.setoutputpath(conf /*from job conf*/, new path(args[1]));
so modified code should below:
if (argv.length<3) { system.err.println("give input/ output/ keyword!"); return; } jobconf conf = new jobconf(stringsearch.class); job job = new job(conf,"stringsearch"); fileinputformat.setinputpaths(conf, new path(args[0])); fileoutputformat.setoutputpath(conf, new path(args[1])); conf.set("search", argv[2]); job.setjarbyclass(stringsearch.class); job.setmapperclass(wordmapper.class); job.setnumreducetasks(0); job.setmapoutputkeyclass(text.class); job.setmapoutputvalueclass(intwritable.class); job.setoutputkeyclass(text.class); job.setoutputvalueclass(intwritable.class); jobclient.runjob(conf); job.waitforcompletion(true);
Comments
Post a Comment