hadoop - Mapreduce Custom TextOutputFormat - Strange characters NUL, SOH, etc -


i have implemented custom output format converting key value pairs json format.

public class jsonoutputformat extends textoutputformat<text, intwritable> { @override public recordwriter<text, intwritable> getrecordwriter(taskattemptcontext context) throws ioexception, interruptedexception {     configuration conf = context.getconfiguration();     path path = getoutputpath(context);     filesystem fs = path.getfilesystem(conf);     fsdataoutputstream out = fs.create(new path(path,context.getjobname()));     return new jsonrecordwriter(out); } 

}

private static class jsonrecordwriter extends linerecordwriter<text,intwritable>{     boolean firstrecord = true;     @override     public synchronized void close(taskattemptcontext context) throws ioexception {         out.writebytes("}");         super.close(context);     }      @override     public synchronized void write(text key, intwritable value)             throws ioexception {         if (!firstrecord){              out.writebytes(",\r\n");             firstrecord = false;         }         out.writeutf(key.tostring() + ":" +value.tostring());     }      public jsonrecordwriter(dataoutputstream out) throws ioexception{         super(out);         out.writebytes("{");     } } 

however, output mapreduce job has undesirable chars such as: {nul chair:12 nul bs book:1}

my driver class follows:

public class driver {  public static class mymapper extends mapper<object, text, text, intwritable> {      intwritable 1 = new intwritable(1);     @override     protected void map(object key, text value, context context) throws ioexception, interruptedexception {         string[] words = value.tostring().split(" ");         for(string word: words)             context.write(new text(word), one);     } } public static class myreducer extends reducer<text, intwritable, text, intwritable> {     @override     protected void reduce(text key, iterable<intwritable> values, context context) throws ioexception, interruptedexception {         iterator = values.iterator();          int count = 0;         while (it.hasnext()){             intwritable c = (intwritable) it.next();             count+=c.get();         }         context.write(key, new intwritable(count));     } }  public static void main(string[] args) throws ioexception, classnotfoundexception, interruptedexception {     configuration configuration = new configuration();     job job = job.getinstance(configuration, "wordcountjson");     job.setjarbyclass(driver.class);      fileinputformat.addinputpath(job, new path(args[0]));     fileoutputformat.setoutputpath(job, new path(args[1]));      job.setmapoutputkeyclass(text.class);     job.setmapoutputvalueclass(intwritable.class);     job.setoutputkeyclass(text.class);     job.setoutputvalueclass(intwritable.class);      job.setmapperclass(mymapper.class);     job.setreducerclass(myreducer.class);     job.setoutputformatclass(jsonoutputformat.class);      job.setnumreducetasks(1);      system.exit(job.waitforcompletion(true)?0:1);    } 

}

any ideas why chars appearing in output?


Comments

Popular posts from this blog

c# - Binding a comma separated list to a List<int> in asp.net web api -

how to prompt save As Box in Excel Interlop c# MVC 4 -

xslt 1.0 - How to access or retrieve mets content of an item from another item? -