1. 程式人生 > >Hadoop中基於檔案的資料格式(1)SequenceFile

Hadoop中基於檔案的資料格式(1)SequenceFile

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;

public class SequenceTest {

	public static final String Output_path = "hdfs://192.x.x.x:9000/a.txt";
	private static final String[] DATA = { "a", "b", "c", };

	@SuppressWarnings("deprecation")
	public static void write(String pathStr) throws IOException {
		Configuration conf = new Configuration();
		Path path = new Path(pathStr);
		FileSystem fs = path.getFileSystem(conf);
		
		SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path,
				Text.class, IntWritable.class);
		Text key = new Text();
		IntWritable value = new IntWritable();
		for (int i = 0; i < DATA.length; i++) {
			key.set(DATA[i]);
			value.set(i);
			System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
			writer.append(key, value);
		}
		IOUtils.closeStream(writer);
	}

	@SuppressWarnings("deprecation")
	public static void read(String pathStr) throws IOException {
		Configuration conf = new Configuration();
		Path path = new Path(pathStr);
		FileSystem fs = path.getFileSystem(conf);
		SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(
				pathStr), conf);
		
		Writable key = (Writable) ReflectionUtils.newInstance(
				reader.getKeyClass(), conf);
		Writable value = (Writable) ReflectionUtils.newInstance(
				reader.getValueClass(), conf);

		while (reader.next(key, value)) {
			System.out.printf("%s\t%s\n", key, value);
		}
		IOUtils.closeStream(reader);
	}

	public static void main(String[] args) throws IOException {
		write(Output_path);
		read(Output_path);
	}
}