• 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏吧

Java MROutput类的典型用法和代码示例

java 1次浏览

本文整理汇总了Java中org.apache.tez.mapreduce.output.MROutput的典型用法代码示例。如果您正苦于以下问题:Java MROutput类的具体用法?Java MROutput怎么用?Java MROutput使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

MROutput类属于org.apache.tez.mapreduce.output包,在下文中一共展示了MROutput类的22个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: createDag

点赞 3

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDag(TezConfiguration tezConf, Path outPath, long outSize, int extraColumns, int numTasks)
        throws IOException {

    long largeOutSizePerTask = outSize / numTasks;

    DAG dag = DAG.create("TopK DataGen");

    Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
                    GenDataProcessor.class.getName()).setUserPayload(
                    UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask, extraColumns)))),
            numTasks);
    genDataVertex.addDataSink(OUTPUT,
            MROutput.createConfigBuilder(new Configuration(tezConf),
                    TextOutputFormat.class, outPath.toUri().toString()).build());
    dag.addVertex(genDataVertex);

    return dag;
}
 

开发者ID:sequenceiq,
项目名称:sequenceiq-samples,
代码行数:19,
代码来源:TopKDataGen.java

示例2: initializeOutputs

点赞 3

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private void initializeOutputs(Map<String, LogicalOutput> outputs) throws Exception {

        for (Entry<String, LogicalOutput> entry : outputs.entrySet()) {
            LogicalOutput output = entry.getValue();
            LOG.info("Starting output " + output + " to vertex " + entry.getKey());
            output.start();
            if (output instanceof MROutput){
                MROutput mrOut = (MROutput) output;
                fileOutputs.add(mrOut);
            }
        }
        LinkedList<TezOutput> tezOuts = PlanHelper.getPhysicalOperators(execPlan, TezOutput.class);
        for (TezOutput tezOut : tezOuts){
            tezOut.attachOutputs(outputs, conf);
        }
    }
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:17,
代码来源:PigProcessor.java

示例3: attachOutputs

点赞 3

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
@Override
public void attachOutputs(Map<String, LogicalOutput> outputs,
        Configuration conf)
        throws ExecException {
    LogicalOutput logicalOut = outputs.get(outputKey);
    if (logicalOut == null || !(logicalOut instanceof MROutput)) {
        throw new ExecException("POStoreTez only accepts MROutput. key =  "
                + getOperatorKey() + ", outputs = " + outputs);
    }
    output = (MROutput) logicalOut;
    try {
        writer = output.getWriter();
    } catch (IOException e) {
        throw new ExecException(e);
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:17,
代码来源:POStoreTez.java

示例4: run

点赞 3

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
@Override
public void run() throws Exception {
  Preconditions.checkArgument(getInputs().size() == 1);
  MROutput out = (MROutput) getOutputs().values().iterator().next();
  KeyValueWriter kvWriter = out.getWriter();
  KeyValuesReader kvReader = (KeyValuesReader) getInputs().values().iterator().next()
      .getReader();
  while (kvReader.next()) {
    Text word = (Text) kvReader.getCurrentKey();
    int sum = 0;
    for (Object value : kvReader.getCurrentValues()) {
      sum += ((IntWritable) value).get();
    }
    kvWriter.write(word, new IntWritable(sum));
  }
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:17,
代码来源:WordCount.java

示例5: createDataSink

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
/**
 *
 */
private void createDataSink(Vertex vertex, String name, Class<? extends Writable> keyClass,
		Class<? extends Writable> valueClass, Class<?> outputFormatClass, String outputPath) {
	JobConf dsConfig = this.buildJobConf(keyClass, valueClass);
	DataSinkDescriptor dataSink = MROutput.createConfigBuilder(dsConfig, outputFormatClass, outputPath).build();
	vertex.addDataSink(name, dataSink);
}
 

开发者ID:hortonworks,
项目名称:dstream,
代码行数:10,
代码来源:TezDAGBuilder.java

示例6: abortOutput

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private void abortOutput() {
    for (MROutput fileOutput : fileOutputs){
        try {
            fileOutput.abort();
        } catch (IOException e) {
            LOG.error("Encountered exception while aborting output", e);
        }
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:10,
代码来源:PigProcessor.java

示例7: run

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
@Override
public void run() throws Exception {
  Preconditions.checkState(getInputs().size() == 2);
  Preconditions.checkState(getOutputs().size() == 1);
  LogicalInput streamInput = getInputs().get("partitioner1");
  LogicalInput hashInput = getInputs().get("partitioner2");
  Reader rawStreamReader = streamInput.getReader();
  Reader rawHashReader = hashInput.getReader();
  Preconditions.checkState(rawStreamReader instanceof KeyValueReader);
  Preconditions.checkState(rawHashReader instanceof KeyValueReader);
  LogicalOutput lo = getOutputs().values().iterator().next();
  Preconditions.checkState(lo instanceof MROutput);
  MROutput output = (MROutput) lo;
  KeyValueWriter writer = output.getWriter();

  KeyValueReader hashKvReader = (KeyValueReader) rawHashReader;
  Set<Text> keySet = new HashSet<Text>();
  while (hashKvReader.next()) {
    keySet.add(new Text((Text) hashKvReader.getCurrentKey()));
  }

  KeyValueReader streamKvReader = (KeyValueReader) rawStreamReader;
  while (streamKvReader.next()) {
    Text key = (Text) streamKvReader.getCurrentKey();
    if (keySet.contains(key)) {
      writer.write(key, NullWritable.get());
    }
  }

  LOG.info("Completed Processing. Trying to commit");
  while (!getContext().canCommit()) {
    Thread.sleep(100l);
  }
  output.commit();
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:36,
代码来源:IntersectExample.java

示例8: createDAG

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {

  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));

  Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
      TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf));
  tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex summerVertex = new Vertex("summer",
      new ProcessorDescriptor(
          SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf));
  summerVertex.addOutput("MROutput", od, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  DAG dag = new DAG("WordCount");
  dag.addVertex(tokenizerVertex)
      .addVertex(summerVertex)
      .addEdge(
          new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
  return dag;  
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:37,
代码来源:WordCount.java

示例9: createDag

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
    Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
    throws IOException {

  long largeOutSizePerTask = largeOutSize / numTasks;
  long smallOutSizePerTask = smallOutSize / numTasks;

  DAG dag = new DAG("IntersectDataGen");

  byte[] streamOutputPayload = createPayloadForOutput(largeOutPath, tezConf);
  byte[] hashOutputPayload = createPayloadForOutput(smallOutPath, tezConf);
  byte[] expectedOutputPayload = createPayloadForOutput(expectedOutputPath, tezConf);

  Vertex genDataVertex = new Vertex("datagen", new ProcessorDescriptor(
      GenDataProcessor.class.getName()).setUserPayload(GenDataProcessor.createConfiguration(
      largeOutSizePerTask, smallOutSizePerTask)), numTasks, MRHelpers.getMapResource(tezConf));
  genDataVertex.addOutput(STREAM_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(streamOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(HASH_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(hashOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(EXPECTED_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(expectedOutputPayload),
      MROutputCommitter.class);

  dag.addVertex(genDataVertex);

  return dag;
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:31,
代码来源:IntersectDataGen.java

示例10: createDag

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
    Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
    throws IOException {

  long largeOutSizePerTask = largeOutSize / numTasks;
  long smallOutSizePerTask = smallOutSize / numTasks;

  DAG dag = DAG.create("JoinDataGen");

  Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
      GenDataProcessor.class.getName()).setUserPayload(
      UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask,
          smallOutSizePerTask)))), numTasks);
  genDataVertex.addDataSink(STREAM_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, largeOutPath.toUri().toString()).build());
  genDataVertex.addDataSink(HASH_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, smallOutPath.toUri().toString()).build());
  genDataVertex.addDataSink(EXPECTED_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, expectedOutputPath.toUri().toString()).build());

  dag.addVertex(genDataVertex);

  return dag;
}
 

开发者ID:apache,
项目名称:tez,
代码行数:28,
代码来源:JoinDataGen.java

示例11: run

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
@Override
public void run() throws Exception {
  
  if (inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
  }

  if (outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
  }

  for (LogicalInput input : inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : outputs.values()) {
    output.start();
  }

  LogicalInput li = inputs.values().iterator().next();
  if (! (li instanceof UnorderedKVInput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
  }

  LogicalOutput lo = outputs.values().iterator().next();
  if (! (lo instanceof MROutput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
  }

  UnorderedKVInput kvInput = (UnorderedKVInput) li;
  MROutput mrOutput = (MROutput) lo;

  KeyValueReader kvReader = kvInput.getReader();
  KeyValueWriter kvWriter = mrOutput.getWriter();
  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object value = kvReader.getCurrentValue();

    kvWriter.write(key, value);
  }
}
 

开发者ID:apache,
项目名称:tez,
代码行数:41,
代码来源:FilterByWordOutputProcessor.java

示例12: createDAG

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
        String columnIndex, String top, String numPartitions) throws IOException {

    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
            TextInputFormat.class, inputPath).build();

    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
            TextOutputFormat.class, outputPath).build();

    Vertex tokenizerVertex = Vertex.create(TOKENIZER,
            ProcessorDescriptor.create(TokenProcessor.class.getName())
                    .setUserPayload(createPayload(Integer.valueOf(columnIndex))))
            .addDataSource(INPUT, dataSource);

    int topK = Integer.valueOf(top);
    Vertex sumVertex = Vertex.create(SUM,
            ProcessorDescriptor.create(SumProcessor.class.getName())
                    .setUserPayload(createPayload(topK)), Integer.valueOf(numPartitions));

    // parallelism must be set to 1 as the writer needs to see the global picture of
    // the data set
    // multiple tasks from the writer will result in multiple list of the top K
    // elements as all task will take the partitioned data's top K element
    Vertex writerVertex = Vertex.create(WRITER,
            ProcessorDescriptor.create(Writer.class.getName())
                    .setUserPayload(createPayload(topK)), 1)
            .addDataSink(OUTPUT, dataSink);

    OrderedPartitionedKVEdgeConfig tokenSumEdge = OrderedPartitionedKVEdgeConfig
            .newBuilder(Text.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName()).build();

    UnorderedKVEdgeConfig sumWriterEdge = UnorderedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), Text.class.getName()).build();

    DAG dag = DAG.create("topk");
    return dag
            .addVertex(tokenizerVertex)
            .addVertex(sumVertex)
            .addVertex(writerVertex)
            .addEdge(Edge.create(tokenizerVertex, sumVertex, tokenSumEdge.createDefaultEdgeProperty()))
            .addEdge(Edge.create(sumVertex, writerVertex, sumWriterEdge.createDefaultBroadcastEdgeProperty()));
}
 

开发者ID:sequenceiq,
项目名称:sequenceiq-samples,
代码行数:44,
代码来源:TopK.java

示例13: run

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
@Override
public void run(Map<String, LogicalInput> inputs,
    Map<String, LogicalOutput> outputs) throws Exception {
  
  if (inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
  }

  if (outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
  }

  for (LogicalInput input : inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : outputs.values()) {
    output.start();
  }

  LogicalInput li = inputs.values().iterator().next();
  if (! (li instanceof ShuffledUnorderedKVInput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
  }

  LogicalOutput lo = outputs.values().iterator().next();
  if (! (lo instanceof MROutput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
  }

  ShuffledUnorderedKVInput kvInput = (ShuffledUnorderedKVInput) li;
  MROutput mrOutput = (MROutput) lo;

  KeyValueReader kvReader = kvInput.getReader();
  KeyValueWriter kvWriter = mrOutput.getWriter();
  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object value = kvReader.getCurrentValue();

    kvWriter.write(key, value);
  }
  if (processorContext.canCommit()) {
    mrOutput.commit();
  } else {
    mrOutput.abort();
  }
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:47,
代码来源:FilterByWordOutputProcessor.java

示例14: createDag

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDag(TezConfiguration tezConf, Path streamPath, Path hashPath, Path outPath,
    int numPartitions) throws IOException {
  DAG dag = new DAG("IntersectExample");

  // Configuration for src1
  Configuration streamInputConf = new Configuration(tezConf);
  streamInputConf.set(FileInputFormat.INPUT_DIR, streamPath.toUri().toString());
  byte[] streamInputPayload = MRInput.createUserPayload(streamInputConf,
      TextInputFormat.class.getName(), true, false);

  // Configuration for src2
  Configuration hashInputConf = new Configuration(tezConf);
  hashInputConf.set(FileInputFormat.INPUT_DIR, hashPath.toUri().toString());
  byte[] hashInputPayload = MRInput.createUserPayload(hashInputConf,
      TextInputFormat.class.getName(), true, false);

  // Configuration for intermediate output - shared by Vertex1 and Vertex2
  // This should only be setting selective keys from the underlying conf. Fix after there's a
  // better mechanism to configure the IOs.

  UnorderedPartitionedKVEdgeConfigurer edgeConf =
      UnorderedPartitionedKVEdgeConfigurer
          .newBuilder(Text.class.getName(), NullWritable.class.getName(),
              HashPartitioner.class.getName(), null).build();

  Configuration finalOutputConf = new Configuration(tezConf);
  finalOutputConf.set(FileOutputFormat.OUTDIR, outPath.toUri().toString());
  byte[] finalOutputPayload = MROutput.createUserPayload(finalOutputConf,
      TextOutputFormat.class.getName(), true);

  // Change the way resources are setup - no MRHelpers
  Vertex streamFileVertex = new Vertex("partitioner1",
      new ProcessorDescriptor(ForwardingProcessor.class.getName()), -1,
      MRHelpers.getMapResource(tezConf)).addInput("streamfile",
      new InputDescriptor(MRInput.class.getName())
          .setUserPayload(streamInputPayload), MRInputAMSplitGenerator.class);

  Vertex hashFileVertex = new Vertex("partitioner2", new ProcessorDescriptor(
      ForwardingProcessor.class.getName()), -1,
      MRHelpers.getMapResource(tezConf)).addInput("hashfile",
      new InputDescriptor(MRInput.class.getName())
          .setUserPayload(hashInputPayload), MRInputAMSplitGenerator.class);

  Vertex intersectVertex = new Vertex("intersect", new ProcessorDescriptor(
      IntersectProcessor.class.getName()), numPartitions,
      MRHelpers.getReduceResource(tezConf)).addOutput("finalOutput",
      new OutputDescriptor(MROutput.class.getName())
          .setUserPayload(finalOutputPayload), MROutputCommitter.class);

  Edge e1 = new Edge(streamFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty());

  Edge e2 = new Edge(hashFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty());

  dag.addVertex(streamFileVertex).addVertex(hashFileVertex).addVertex(intersectVertex)
      .addEdge(e1).addEdge(e2);
  return dag;
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:58,
代码来源:IntersectExample.java

示例15: createPayloadForOutput

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private byte[] createPayloadForOutput(Path outputPath, Configuration srcConf) throws IOException {
  Configuration conf = new Configuration(srcConf);
  conf.set(FileOutputFormat.OUTDIR, outputPath.toUri().toString());
  byte[] payload = MROutput.createUserPayload(conf, TextOutputFormat.class.getName(), true);
  return payload;
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:7,
代码来源:IntersectDataGen.java

示例16: createDAG

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {
  DAG dag = new DAG("UnionExample");
  
  int numMaps = -1;
  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Vertex mapVertex1 = new Vertex("map1", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex1.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex2.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex3.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex checkerVertex = new Vertex("checker",
      new ProcessorDescriptor(
          UnionProcessor.class.getName()),
              1, MRHelpers.getReduceResource(tezConf));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("union", od, MROutputCommitter.class);

  Configuration allPartsConf = new Configuration(tezConf);
  allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts");
  OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        allPartsConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class);

  Configuration partsConf = new Configuration(tezConf);
  partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts");
  
  VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
  OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        partsConf, TextOutputFormat.class.getName(), true));
  unionVertex.addOutput("parts", od1, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  dag.addVertex(mapVertex1)
      .addVertex(mapVertex2)
      .addVertex(mapVertex3)
      .addVertex(checkerVertex)
      .addEdge(
          new Edge(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
      .addEdge(
          new GroupInputEdge(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
              new InputDescriptor(
                  ConcatenatedMergedKeyValuesInput.class.getName())));
  return dag;  
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:72,
代码来源:UnionExample.java

示例17: createDAG

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDAG(TezConfiguration tezConf, String inputPath1, String inputPath2,
                      String inputPath3, String outputPath, boolean isPartitioned)
  throws IOException {
  Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  // turn off groupSplit so that each input file incurs one task
  v1.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1)
           .groupSplits(false).build());
  Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v2.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2)
            .groupSplits(false).build());
  Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v3.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3)
      .groupSplits(false).build());
  CartesianProductConfig cartesianProductConfig;
  if (isPartitioned) {
    Map<String, Integer> vertexPartitionMap = new HashMap<>();
    for (String vertex : cpSources) {
      vertexPartitionMap.put(vertex, numPartition);
    }
    cartesianProductConfig = new CartesianProductConfig(vertexPartitionMap);
  } else {
    cartesianProductConfig = new CartesianProductConfig(Arrays.asList(cpSources));
  }
  UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
  Vertex v4 = Vertex.create(VERTEX4, ProcessorDescriptor.create(JoinProcessor.class.getName()));
  v4.addDataSink(OUTPUT,
    MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, outputPath)
            .build());
  v4.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                                 .setUserPayload(userPayload));

  EdgeManagerPluginDescriptor cpEdgeManager =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
  cpEdgeManager.setUserPayload(userPayload);
  EdgeProperty cpEdgeProperty;
  if (isPartitioned) {
    UnorderedPartitionedKVEdgeConfig cpEdgeConf =
      UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(),
        IntWritable.class.getName(), CustomPartitioner.class.getName()).build();
    cpEdgeProperty = cpEdgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
  } else {
    UnorderedKVEdgeConfig edgeConf =
      UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
    cpEdgeProperty = edgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
  }

  EdgeProperty broadcastEdgeProperty;
  UnorderedKVEdgeConfig broadcastEdgeConf =
    UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
  broadcastEdgeProperty = broadcastEdgeConf.createDefaultBroadcastEdgeProperty();

  return DAG.create("CartesianProduct")
    .addVertex(v1).addVertex(v2).addVertex(v3).addVertex(v4)
    .addEdge(Edge.create(v1, v4, cpEdgeProperty))
    .addEdge(Edge.create(v2, v4, cpEdgeProperty))
    .addEdge(Edge.create(v3, v4, broadcastEdgeProperty));
}
 

开发者ID:apache,
项目名称:tez,
代码行数:62,
代码来源:CartesianProduct.java

示例18: createDAG

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
    int numPartitions) throws IOException {

  // Create the descriptor that describes the input data to Tez. Using MRInput to read text 
  // data from the given input path. The TextInputFormat is used to read the text data.
  DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
      TextInputFormat.class, inputPath).groupSplits(!isDisableSplitGrouping())
        .generateSplitsInAM(!isGenerateSplitInClient()).build();

  // Create a descriptor that describes the output data to Tez. Using MROoutput to write text
  // data to the given output path. The TextOutputFormat is used to write the text data.
  DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
      TextOutputFormat.class, outputPath).build();

  // Create a vertex that reads the data from the data source and tokenizes it using the 
  // TokenProcessor. The number of tasks that will do the work for this vertex will be decided 
  // using the information provided by the data source descriptor.
  Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
      TokenProcessor.class.getName())).addDataSource(INPUT, dataSource);

  // Create the edge that represents the movement and semantics of data between the producer 
  // Tokenizer vertex and the consumer Summation vertex. In order to perform the summation in 
  // parallel the tokenized data will be partitioned by word such that a given word goes to the 
  // same partition. The counts for the words should be grouped together per word. To achieve this
  // we can use an edge that contains an input/output pair that handles partitioning and grouping 
  // of key value data. We use the helper OrderedPartitionedKVEdgeConfig to create such an
  // edge. Internally, it sets up matching Tez inputs and outputs that can perform this logic.
  // We specify the key, value and partitioner type. Here the key type is Text (for word), the 
  // value type is IntWritable (for count) and we using a hash based partitioner. This is a helper
  // object. The edge can be configured by configuring the input, output etc individually without
  // using this helper. The setFromConfiguration call is optional and allows overriding the config
  // options with command line parameters.
  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  // Create a vertex that reads the tokenized data and calculates the sum using the SumProcessor.
  // The number of tasks that do the work of this vertex depends on the number of partitions used 
  // to distribute the sum processing. In this case, its been made configurable via the 
  // numPartitions parameter.
  Vertex summationVertex = Vertex.create(SUMMATION,
      ProcessorDescriptor.create(SumProcessor.class.getName()), numPartitions)
      .addDataSink(OUTPUT, dataSink);

  // No need to add jar containing this class as assumed to be part of the Tez jars. Otherwise 
  // we would have to add the jars for this code as local files to the vertices.
  
  // Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
  DAG dag = DAG.create("WordCount");
  dag.addVertex(tokenizerVertex)
      .addVertex(summationVertex)
      .addEdge(
          Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));
  return dag;  
}
 

开发者ID:apache,
项目名称:tez,
代码行数:58,
代码来源:WordCount.java

示例19: createDAG

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
public static DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
    int numPartitions, boolean disableSplitGrouping, boolean isGenerateSplitInClient, String dagName) throws IOException {

  DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
      TextInputFormat.class, inputPath).groupSplits(!disableSplitGrouping)
        .generateSplitsInAM(!isGenerateSplitInClient).build();

  DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
      TextOutputFormat.class, outputPath).build();

  Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
      TokenProcessor.class.getName()));
  tokenizerVertex.addDataSource(INPUT, dataSource);

  // Use Text key and IntWritable value to bring counts for each word in the same partition
  // The setFromConfiguration call is optional and allows overriding the config options with
  // command line parameters.
  OrderedPartitionedKVEdgeConfig summationEdgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  // This vertex will be reading intermediate data via an input edge and writing intermediate data
  // via an output edge.
  Vertex summationVertex = Vertex.create(SUMMATION, ProcessorDescriptor.create(
      SumProcessor.class.getName()), numPartitions);
  
  // Use IntWritable key and Text value to bring all words with the same count in the same 
  // partition. The data will be ordered by count and words grouped by count. The
  // setFromConfiguration call is optional and allows overriding the config options with
  // command line parameters.
  OrderedPartitionedKVEdgeConfig sorterEdgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(IntWritable.class.getName(), Text.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  // Use 1 task to bring all the data in one place for global sorted order. Essentially the number
  // of partitions is 1. So the NoOpSorter can be used to produce the globally ordered output
  Vertex sorterVertex = Vertex.create(SORTER, ProcessorDescriptor.create(
      NoOpSorter.class.getName()), 1);
  sorterVertex.addDataSink(OUTPUT, dataSink);

  // No need to add jar containing this class as assumed to be part of the tez jars.
  
  DAG dag = DAG.create(dagName);
  dag.addVertex(tokenizerVertex)
      .addVertex(summationVertex)
      .addVertex(sorterVertex)
      .addEdge(
          Edge.create(tokenizerVertex, summationVertex,
              summationEdgeConf.createDefaultEdgeProperty()))
      .addEdge(
          Edge.create(summationVertex, sorterVertex, sorterEdgeConf.createDefaultEdgeProperty()));
  return dag;  
}
 

开发者ID:apache,
项目名称:tez,
代码行数:58,
代码来源:OrderedWordCount.java

示例20: createDAG

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {
  DAG dag = DAG.create("UnionExample");
  
  int numMaps = -1;
  Configuration inputConf = new Configuration(tezConf);
  inputConf.setBoolean("mapred.mapper.new-api", false);
  inputConf.set("mapred.input.format.class", TextInputFormat.class.getName());
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  MRInput.MRInputConfigBuilder configurer = MRInput.createConfigBuilder(inputConf, null);
  DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();

  Vertex mapVertex1 = Vertex.create("map1", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex mapVertex2 = Vertex.create("map2", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex mapVertex3 = Vertex.create("map3", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex checkerVertex = Vertex.create("checker", ProcessorDescriptor.create(
      UnionProcessor.class.getName()), 1);

  Configuration outputConf = new Configuration(tezConf);
  outputConf.setBoolean("mapred.reducer.new-api", false);
  outputConf.set("mapred.output.format.class", TextOutputFormat.class.getName());
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
  checkerVertex.addDataSink("union", od);
  

  Configuration allPartsConf = new Configuration(tezConf);
  DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
      TextOutputFormat.class, outputPath + "-all-parts").build();
  checkerVertex.addDataSink("all-parts", od2);

  Configuration partsConf = new Configuration(tezConf);    
  DataSinkDescriptor od1 = MROutput.createConfigBuilder(partsConf,
      TextOutputFormat.class, outputPath + "-parts").build();
  VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
  unionVertex.addDataSink("parts", od1);

  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName()).build();

  dag.addVertex(mapVertex1)
      .addVertex(mapVertex2)
      .addVertex(mapVertex3)
      .addVertex(checkerVertex)
      .addEdge(
          Edge.create(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
      .addEdge(
          GroupInputEdge.create(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
              InputDescriptor.create(
                  ConcatenatedMergedKeyValuesInput.class.getName())));
  return dag;  
}
 

开发者ID:apache,
项目名称:tez,
代码行数:61,
代码来源:UnionExample.java

示例21: runWordCount

点赞 2

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
private String runWordCount(String tokenizerProcessor, String summationProcessor,
    String dagName, boolean withTimeline)
    throws Exception {
  //HDFS path
  Path outputLoc = new Path("/tmp/outPath_" + System.currentTimeMillis());

  DataSourceDescriptor dataSource = MRInput.createConfigBuilder(conf,
      TextInputFormat.class, inputLoc.toString()).build();

  DataSinkDescriptor dataSink =
      MROutput.createConfigBuilder(conf, TextOutputFormat.class, outputLoc.toString()).build();

  Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
      tokenizerProcessor)).addDataSource(INPUT, dataSource);

  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName()).build();

  Vertex summationVertex = Vertex.create(SUMMATION,
      ProcessorDescriptor.create(summationProcessor), 1).addDataSink(OUTPUT, dataSink);

  // Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
  DAG dag = DAG.create(dagName);
  dag.addVertex(tokenizerVertex).addVertex(summationVertex).addEdge(
      Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));

  TezClient tezClient = getTezClient(withTimeline);

  // Update Caller Context
  CallerContext callerContext = CallerContext.create("TezExamples", "Tez WordCount Example Job");
  ApplicationId appId = tezClient.getAppMasterApplicationId();
  if (appId == null) {
    appId = ApplicationId.newInstance(1001l, 1);
  }
  callerContext.setCallerIdAndType(appId.toString(), "TezApplication");
  dag.setCallerContext(callerContext);

  DAGClient client = tezClient.submitDAG(dag);
  client.waitForCompletionWithStatusUpdates(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
  TezDAGID tezDAGID = TezDAGID.getInstance(tezClient.getAppMasterApplicationId(), 1);

  if (tezClient != null) {
    tezClient.stop();
  }
  return tezDAGID.toString();
}
 

开发者ID:apache,
项目名称:tez,
代码行数:48,
代码来源:TestHistoryParser.java

示例22: addMROutput

点赞 1

import org.apache.tez.mapreduce.output.MROutput; //导入依赖的package包/类
/**
 * Convenience method to add an MR Output to the specified vertex. The name of
 * the Output is "MROutput" </p>
 * 
 * This should only be called for one vertex in a DAG
 * 
 * @param vertex
 * @param userPayload
 */
public static void addMROutput(Vertex vertex, byte[] userPayload) {
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(userPayload);
  vertex.addOutput("MROutput", od, MROutputCommitter.class);
}
 

开发者ID:apache,
项目名称:incubator-tez,
代码行数:15,
代码来源:MRHelpers.java


版权声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。
喜欢 (0)