• 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏吧

Java POLoad类的典型用法和代码示例

java 4次浏览

本文整理汇总了Java中org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad的典型用法代码示例。如果您正苦于以下问题:Java POLoad类的具体用法?Java POLoad怎么用?Java POLoad使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

POLoad类属于org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators包,在下文中一共展示了POLoad类的35个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: updateUDFs

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public void updateUDFs(PhysicalPlan plan) {		
	try {
		for (POStore store : PlanHelper.getPhysicalOperators(plan, POStore.class)) {
			if (store.getStoreFunc() instanceof ISignStore) {
				ISignStore sf = (ISignStore) store.getStoreFunc();
				splan.UDFs.addAll(sf.getUDFs());
			}
		}
	
		for (POLoad load : PlanHelper.getPhysicalOperators(plan, POLoad.class)) {
			if (load.getLoadFunc() instanceof SpoutWrapper) {
				SpoutWrapper lf = (SpoutWrapper) load.getLoadFunc();
				// Add the spout's UDF so it gets picked up by the Jar.
				splan.UDFs.add(lf.getSpoutClass());
			}
		}
	} catch (VisitorException e) {
		throw new RuntimeException(e);
	}
}
 

开发者ID:JamesLampton,
项目名称:piggybank-squeal,
代码行数:21,
代码来源:MRtoSConverter.java

示例2: getInputSizeFromLoader

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * Get the total input size in bytes by looking at statistics provided by
 * loaders that implement @{link LoadMetadata}.
 * @param ld
 * @param job
 * @return total input size in bytes, or -1 if unknown or incomplete
 * @throws IOException on error
 */
static long getInputSizeFromLoader(POLoad ld, Job job) throws IOException {
    if (ld.getLoadFunc() == null
            || !(ld.getLoadFunc() instanceof LoadMetadata)
            || ld.getLFile() == null
            || ld.getLFile().getFileName() == null) {
        return -1;
    }

    ResourceStatistics statistics;
    try {
        statistics = ((LoadMetadata) ld.getLoadFunc())
                    .getStatistics(ld.getLFile().getFileName(), job);
    } catch (Exception e) {
        log.warn("Couldn't get statistics from LoadFunc: " + ld.getLoadFunc(), e);
        return -1;
    }

    if (statistics == null || statistics.getSizeInBytes() == null) {
        return -1;
    }

    return statistics.getSizeInBytes();
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:32,
代码来源:InputSizeReducerEstimator.java

示例3: connRedOper

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * Connect the reduce MROpers to the leaf node in the map MROper mro
 * by adding appropriate loads
 * @param mergedPlans - The list of reduce MROpers
 * @param mro - The map MROper
 * @throws PlanException 
 * @throws IOException
 */
private void connRedOper(List<MapReduceOper> mergedPlans, MapReduceOper mro) throws PlanException, IOException{
    PhysicalOperator leaf = null;
    List<PhysicalOperator> leaves = mro.mapPlan.getLeaves();
    if(leaves!=null && leaves.size()>0)
        leaf = leaves.get(0);

    for (MapReduceOper mmro : mergedPlans) {
        mmro.setReduceDone(true);
        FileSpec fileSpec = getTempFileSpec();
        POLoad ld = getLoad();
        ld.setLFile(fileSpec);
        POStore str = getStore();
        str.setSFile(fileSpec);
        mmro.reducePlan.addAsLeaf(str);
        mro.mapPlan.add(ld);
        if(leaf!=null)
            mro.mapPlan.connect(ld, leaf);
        MRPlan.connect(mmro, mro);
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:29,
代码来源:MRCompiler.java

示例4: ExampleGenerator

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public ExampleGenerator(LogicalPlan plan, PigContext hadoopPigContext) {
        this.plan = plan;
//        pigContext = new PigContext(ExecType.LOCAL, hadoopPigContext
//                .getProperties());
        pigContext = hadoopPigContext;
        // pigContext.setExecType(ExecType.LOCAL);
        FileLocalizer.setInitialized(false);
        try {
            pigContext.connect();
        } catch (ExecException e) {
            log.error("Error connecting to the cluster "
                    + e.getLocalizedMessage());

        }
        execEngine = new HExecutionEngine(pigContext);
        localMRRunner = new LocalMapReduceSimulator();
        poLoadToSchemaMap = new HashMap<POLoad, LogicalSchema>();
    }
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:19,
代码来源:ExampleGenerator.java

示例5: readBaseData

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
private void readBaseData(List<Operator> loads) throws IOException, InterruptedException, FrontendException, ExecException {
    PhysicalPlan thisPhyPlan = new PhysicalPlan();
    for (Operator op : loads) {
        LogicalSchema schema = ((LOLoad) op).getSchema();
        if(schema == null) {
            throw new ExecException("Example Generator requires a schema. Please provide a schema while loading data.");
        }
        poLoadToSchemaMap.put((POLoad)logToPhyMap.get(op), schema);
        thisPhyPlan.add(logToPhyMap.get(op));
    }
    baseData = null;
    Map<Operator, DataBag> result = getData(thisPhyPlan);
    baseData = new HashMap<LOLoad, DataBag>();
    for (Operator lo : result.keySet()) {
        if (lo instanceof LOLoad) {
            baseData.put((LOLoad) lo, result.get(lo));
        }
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:20,
代码来源:ExampleGenerator.java

示例6: visitLoad

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Override
public void visitLoad(POLoad ld) throws VisitorException{
    // LOAD from temporary files need no illustrator
    if (revisit)
        return;

    LinkedList<IdentityHashSet<Tuple>> eqClasses = new LinkedList<IdentityHashSet<Tuple>>();
    poToEqclassesMap.put(ld, eqClasses);

    IdentityHashSet<Tuple> eqClass = new IdentityHashSet<Tuple>();
    eqClasses.add(eqClass);
    Illustrator illustrator;
    illustrator = new Illustrator(lineage, eqClasses, maxRecords, this, poloadToSchemaMap.get(ld), pigContext);
    ld.setIllustrator(illustrator);
    poToDataMap.put(ld, illustrator.getData());
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:17,
代码来源:IllustratorAttacher.java

示例7: testLimit

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Test
public void testLimit() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lC = GenPhyOp.topLoadOp();
    php.add(lC);

    POLimit op = new POLimit(new OperatorKey("", r.nextLong()),
            -1, null);

    php.add(op);
    php.connect(lC, op);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC17.gld");
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:18,
代码来源:TestMRCompiler.java

示例8: convert

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessorRdds, POLoad poLoad)
        throws IOException {
    // if (predecessors.size()!=0) {
    // throw new
    // RuntimeException("Should not have predecessors for Load. Got : "+predecessors);
    // }

    JobConf loadJobConf = SparkUtil.newJobConf(pigContext);
    configureLoader(physicalPlan, poLoad, loadJobConf);

    // don't know why but just doing this cast for now
    RDD<Tuple2<Text, Tuple>> hadoopRDD = sparkContext.newAPIHadoopFile(
            poLoad.getLFile().getFileName(), PigInputFormatSpark.class,
            Text.class, Tuple.class, loadJobConf);

    registerUdfFiles();
    // map to get just RDD<Tuple>
    return hadoopRDD.map(TO_TUPLE_FUNCTION,
            SparkUtil.getManifest(Tuple.class));
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:22,
代码来源:LoadConverter.java

示例9: isPlanFetchable

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * Visits the plan with {@link FetchablePlanVisitor} and checks whether the
 * plan is fetchable.
 *
 * @param pc PigContext
 * @param pp the physical plan to be examined
 * @return true if the plan is fetchable
 * @throws VisitorException
 */
public static boolean isPlanFetchable(PigContext pc, PhysicalPlan pp) throws VisitorException {
    if (isEligible(pc, pp)) {
        FetchablePlanVisitor fpv = new FetchablePlanVisitor(pc, pp);
        fpv.visit();
        // Plan is fetchable only if FetchablePlanVisitor returns true AND
        // limit is present in the plan, i.e: limit is pushed up to the loader.
        // Limit is a safeguard. If the input is large, and there is no limit, 
        // fetch optimizer will fetch the entire input to the client. That can be dangerous.
        if (!fpv.isPlanFetchable()) {
            return false;
        }
        for (POLoad load : PlanHelper.getPhysicalOperators(pp, POLoad.class)) {
            if (load.getLimit() == -1) {
                return false;
            }
        }
        pc.getProperties().setProperty(PigImplConstants.CONVERTED_TO_FETCH, "true");
        init(pp);
        return true;
    }
    return false;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:32,
代码来源:FetchOptimizer.java

示例10: isEligible

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * Checks whether the plan fulfills the prerequisites needed for fetching.
 *
 * @param pc PigContext
 * @param pp the physical plan to be examined
 * @return
 */
private static boolean isEligible(PigContext pc, PhysicalPlan pp) {
    if (!isFetchEnabled(pc)) {
        return false;
    }

    List<PhysicalOperator> roots = pp.getRoots();
    for (PhysicalOperator po : roots) {
        if (!(po instanceof POLoad)) {
            String msg = "Expected physical operator at root is POLoad. Found : "
                    + po.getClass().getCanonicalName() + ". Fetch optimizer will be disabled.";
            LOG.debug(msg);
            return false;
        }
    }

    //consider single leaf jobs only
    int leafSize = pp.getLeaves().size();
    if (pp.getLeaves().size() != 1) {
        LOG.debug("Expected physical plan should have one leaf. Found " + leafSize);
        return false;
    }

    return true;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:32,
代码来源:FetchOptimizer.java

示例11: okToRunLocal

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
private boolean okToRunLocal(org.apache.hadoop.mapreduce.Job job, MapReduceOper mro, List<POLoad> lds) throws IOException {
    Configuration conf = job.getConfiguration();
    if(!conf.getBoolean(PigConfiguration.PIG_AUTO_LOCAL_ENABLED, false)) {
        return false;
    }

    long inputByteMax = conf.getLong(PigConfiguration.PIG_AUTO_LOCAL_INPUT_MAXBYTES, 100*1000*1000l);
    long totalInputFileSize = InputSizeReducerEstimator.getTotalInputFileSize(conf, lds, job, inputByteMax);
    log.info("Size of input: " + totalInputFileSize +" bytes. Small job threshold: " + inputByteMax );
    if (totalInputFileSize < 0 || totalInputFileSize > inputByteMax) {
        return false;
    }

    int reducers = conf.getInt(MRConfiguration.REDUCE_TASKS, 1);
    log.info("No of reducers: " + reducers);
    if (reducers > 1) {
        return false;
    }

    return true;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:22,
代码来源:JobControlCompiler.java

示例12: estimateNumberOfReducers

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * Determines the number of reducers to be used.
 *
 * @param job job instance
 * @param mapReduceOper
 * @throws java.io.IOException
 */
@Override
public int estimateNumberOfReducers(Job job, MapReduceOper mapReduceOper) throws IOException {
    Configuration conf = job.getConfiguration();

    long bytesPerReducer = conf.getLong(BYTES_PER_REDUCER_PARAM, DEFAULT_BYTES_PER_REDUCER);
    int maxReducers = conf.getInt(MAX_REDUCER_COUNT_PARAM, DEFAULT_MAX_REDUCER_COUNT_PARAM);

    List<POLoad> poLoads = PlanHelper.getPhysicalOperators(mapReduceOper.mapPlan, POLoad.class);
    long totalInputFileSize = getTotalInputFileSize(conf, poLoads, job);

    log.info("BytesPerReducer=" + bytesPerReducer + " maxReducers="
        + maxReducers + " totalInputFileSize=" + totalInputFileSize);

    // if totalInputFileSize == -1, we couldn't get the input size so we can't estimate.
    if (totalInputFileSize == -1) { return -1; }

    int reducers = (int)Math.ceil((double)totalInputFileSize / bytesPerReducer);
    reducers = Math.max(1, reducers);
    reducers = Math.min(maxReducers, reducers);

    return reducers;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:30,
代码来源:InputSizeReducerEstimator.java

示例13: connRedOper

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * Connect the reduce MROpers to the leaf node in the map MROper mro
 * by adding appropriate loads
 * @param mergedPlans - The list of reduce MROpers
 * @param mro - The map MROper
 * @throws PlanException
 * @throws IOException
 */
private void connRedOper(List<MapReduceOper> mergedPlans, MapReduceOper mro) throws PlanException, IOException{
    PhysicalOperator leaf = null;
    List<PhysicalOperator> leaves = mro.mapPlan.getLeaves();
    if(leaves!=null && leaves.size()>0)
        leaf = leaves.get(0);

    for (MapReduceOper mmro : mergedPlans) {
        mmro.setReduceDone(true);
        FileSpec fileSpec = getTempFileSpec();
        POLoad ld = getLoad();
        ld.setLFile(fileSpec);
        POStore str = getStore();
        str.setSFile(fileSpec);
        mmro.reducePlan.addAsLeaf(str);
        mro.mapPlan.add(ld);
        if(leaf!=null)
            mro.mapPlan.connect(ld, leaf);
        MRPlan.connect(mmro, mro);
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:29,
代码来源:MRCompiler.java

示例14: ExampleGenerator

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public ExampleGenerator(LogicalPlan plan, PigContext hadoopPigContext) {
        this.plan = plan;
//        pigContext = new PigContext(ExecType.LOCAL, hadoopPigContext
//                .getProperties());
        pigContext = hadoopPigContext;
        // pigContext.setExecType(ExecType.LOCAL);
        FileLocalizer.setInitialized(false);
        try {
            pigContext.connect();
        } catch (ExecException e) {
            log.error("Error connecting to the cluster "
                    + e.getLocalizedMessage());

        }
        execEngine = new MRExecutionEngine(pigContext);
        localMRRunner = new LocalMapReduceSimulator();
        poLoadToSchemaMap = new HashMap<POLoad, LogicalSchema>();
    }
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:19,
代码来源:ExampleGenerator.java

示例15: test6

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Test
public void test6() throws Exception {
    PigContext pc = pigServer.getPigContext();

    PhysicalPlan pp = new PhysicalPlan();
    POLoad poLoad = GenPhyOp.topLoadOp();
    pp.add(poLoad);
    POLimit poLimit = new POLimit(new OperatorKey("", r.nextLong()), -1, null);
    pp.add(poLimit);
    pp.connect(poLoad, poLimit);
    POStore poStore = GenPhyOp.topStoreOp();
    pp.addAsLeaf(poStore);

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(baos);
    new FetchLauncher(pc).explain(pp, pc, ps, "xml");
    assertTrue(baos.toString().matches("(?si).*No MR jobs. Fetch only.*"));

}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:20,
代码来源:TestFetch.java

示例16: createPOLoadWithSize

点赞 3

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public static POLoad createPOLoadWithSize(long size, LoadFunc loadFunc) throws Exception {
    File file = File.createTempFile("tempFile", ".tmp");
    file.deleteOnExit();
    RandomAccessFile f = new RandomAccessFile(file, "rw");
    f.setLength(size);
    f.close();

    loadFunc.setLocation(file.getAbsolutePath(), new org.apache.hadoop.mapreduce.Job(CONF));
    FuncSpec funcSpec = new FuncSpec(loadFunc.getClass().getCanonicalName());
    POLoad poLoad = new POLoad(new OperatorKey(), loadFunc);
    poLoad.setLFile(new FileSpec(file.getAbsolutePath(), funcSpec));
    poLoad.setPc(new PigContext());
    poLoad.setUp();

    return poLoad;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:17,
代码来源:TestJobControlCompiler.java

示例17: extractStaticPlans

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
private void extractStaticPlans() throws PlanException, VisitorException, CloneNotSupportedException {
		for (MapReduceOper mr : mixMOP) {
//			System.out.println("MIX OP -- PROC: " + mr.getOperatorKey());
			
			List<MapReduceOper> preds = new ArrayList<MapReduceOper>();
			if (plan.getPredecessors(mr) != null) {
				preds.addAll(plan.getPredecessors(mr));
			}
			List<MapReduceOper> static_preds = new ArrayList<MapReduceOper>();
			// Look at the predecessors for static trees.
			for (MapReduceOper pre : preds) {
				if (staticMOPs.contains(pre)) {
					// Add to a list of static elements.
					static_preds.add(pre);
					// Prune this tree from the plan.
					moveToStaticPlan(pre);					
				}
			}
			
			// Pull the loads for this operator to replace them.
			List<POLoad> load_list = new ArrayList<POLoad>();
			new LoadFinder(mr.mapPlan, load_list).visit();
			
			// Add a new step to the static plan to load
			// the data into a state.
			addLoadStateOper(mr, static_preds, load_list);
			
			// Replace all the static loads with NOPs
			for (POLoad load : load_list) {
				if (staticLoads.contains(load)) {
					mr.mapPlan.replace(load, getNOP(load));					
				}
			}
		}
	}
 

开发者ID:JamesLampton,
项目名称:piggybank-squeal,
代码行数:36,
代码来源:StaticPlanFixer.java

示例18: getInputSOP

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public StormOper getInputSOP(POLoad pl) {
	if (PLSpoutLink.get(pl) != null) {
		return PLSpoutLink.get(pl);
	} else {
		return rootMap.get(pl.getLFile().getFileName());
	}
}
 

开发者ID:JamesLampton,
项目名称:piggybank-squeal,
代码行数:8,
代码来源:SOperPlan.java

示例19: NOPLoad

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public NOPLoad(OperatorKey k, POLoad load) {
	super(k);
	
	FileSpec newLFile = new FileSpec(load.getLFile().getFileName(), 
			new FuncSpec(NOPLoader.class.getName() + "()"));
	
	this.setLFile(newLFile);
	this.addOriginalLocation(load.getAlias(), load.getOriginalLocations());
}
 

开发者ID:JamesLampton,
项目名称:piggybank-squeal,
代码行数:10,
代码来源:NOPLoad.java

示例20: visit

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Override
public void visit(LOLoad loLoad) throws FrontendException {
    String scope = DEFAULT_SCOPE;

    // The last parameter here is set to true as we assume all files are
    // splittable due to LoadStore Refactor
    POLoad load = new POLoad(new OperatorKey(scope, nodeGen
            .getNextNodeId(scope)), loLoad.getLoadFunc());
    load.addOriginalLocation(loLoad.getAlias(), loLoad.getLocation());
    load.setLFile(loLoad.getFileSpec());
    load.setPc(pc);
    load.setResultType(DataType.BAG);
    load.setSignature(loLoad.getSignature());
    load.setLimit(loLoad.getLimit());
    currentPlan.add(load);
    logToPhyMap.put(loLoad, load);

    // Load is typically a root operator, but in the multiquery
    // case it might have a store as a predecessor.
    List<Operator> op = loLoad.getPlan().getPredecessors(loLoad);
    PhysicalOperator from;

    if(op != null) {
        from = logToPhyMap.get(op.get(0));
        try {
            currentPlan.connect(from, load);
        } catch (PlanException e) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
        }
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:34,
代码来源:LogToPhyTranslationVisitor.java

示例21: configureLoader

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * stolen from JobControlCompiler
 * TODO: refactor it to share this
 * @param physicalPlan
 * @param poLoad
 * @param configuration
 * @return
 * @throws java.io.IOException
 */
private static Configuration configureLoader(PhysicalPlan physicalPlan,
		POLoad poLoad, Configuration configuration, PigContext pigContext) throws IOException {

	Job job = new Job(configuration);
	LoadFunc loadFunc = poLoad.getLoadFunc();

	loadFunc.setLocation(poLoad.getLFile().getFileName(), job);

	// stolen from JobControlCompiler
	ArrayList<FileSpec> pigInputs = new ArrayList<FileSpec>();
	//Store the inp filespecs
	pigInputs.add(poLoad.getLFile());

	ArrayList<List<OperatorKey>> inpTargets = Lists.newArrayList();
	ArrayList<String> inpSignatures = Lists.newArrayList();
	ArrayList<Long> inpLimits = Lists.newArrayList();
	//Store the target operators for tuples read
	//from this input
	List<PhysicalOperator> loadSuccessors = physicalPlan.getSuccessors(poLoad);
	List<OperatorKey> loadSuccessorsKeys = Lists.newArrayList();
	if(loadSuccessors!=null){
		for (PhysicalOperator loadSuccessor : loadSuccessors) {
			loadSuccessorsKeys.add(loadSuccessor.getOperatorKey());
		}
	}
	inpTargets.add(loadSuccessorsKeys);
	inpSignatures.add(poLoad.getSignature());
	inpLimits.add(poLoad.getLimit());

	configuration.set("pig.inputs", ObjectSerializer.serialize(pigInputs));
	configuration.set("pig.inpTargets", ObjectSerializer.serialize(inpTargets));
	configuration.set("pig.inpSignatures", ObjectSerializer.serialize(inpSignatures));
	configuration.set("pig.inpLimits", ObjectSerializer.serialize(inpLimits));
	configuration.set("pig.pigContext", ObjectSerializer.serialize(pigContext));
	configuration.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList()));
	return configuration;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:47,
代码来源:LoadConverter.java

示例22: visitLoad

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Override
public void visitLoad(POLoad load) {
    // As we go through update the load ops of the tmp stores
    // that we removed with the resulting other stores output.
    FileSpec spec = replacementMap.get(load.getLFile().getFileName());
    if (spec != null) {
        load.setLFile(spec);
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:10,
代码来源:NoopStoreRemover.java

示例23: testCogroup2

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Test    
public void testCogroup2() throws Exception {
    String query = ("a = load 'd.txt' as (name:chararray, age:int, gpa:float);" +
    "b = group a by ( name, age );" +
    "store b into 'empty';");  
    
    LogicalPlan newLogicalPlan = buildPlan(query);
    
    PhysicalPlan phyPlan = translatePlan(newLogicalPlan);
    
    assertEquals( 1, phyPlan.getRoots().size() );
    assertEquals( POLoad.class, phyPlan.getRoots().get(0).getClass() );
    POLoad load = (POLoad)phyPlan.getRoots().get(0);
    
    assertEquals( POLocalRearrange.class, phyPlan.getSuccessors(load).get(0).getClass() );
    POLocalRearrange localR = (POLocalRearrange)phyPlan.getSuccessors(load).get(0);
    assertEquals( 1, localR.getInputs().size() );
    assertEquals( 2, localR.getPlans().size() );
    PhysicalPlan cogroupPlan = localR.getPlans().get(0);
    assertEquals( 1, cogroupPlan.getLeaves().size() );        
    assertEquals( POProject.class, cogroupPlan.getLeaves().get(0).getClass() );
    POProject prj = (POProject)cogroupPlan.getLeaves().get(0);
    assertEquals( 0, prj.getColumn() );
    assertEquals( DataType.CHARARRAY, prj.getResultType() );
    
    PhysicalPlan cogroupPlan2 = localR.getPlans().get(1);
    POProject prj2 = (POProject)cogroupPlan2.getLeaves().get(0);
    assertEquals( 1, prj2.getColumn() );
    assertEquals( DataType.INTEGER, prj2.getResultType() );
    
    assertEquals( POGlobalRearrange.class, phyPlan.getSuccessors(localR).get(0).getClass() );
    POGlobalRearrange globalR = (POGlobalRearrange)phyPlan.getSuccessors(localR).get(0);
    assertEquals( DataType.TUPLE, globalR.getResultType() );
    
    assertEquals( POPackage.class, phyPlan.getSuccessors(globalR).get(0).getClass() );
    POPackage pack = (POPackage)phyPlan.getSuccessors(globalR).get(0);
    assertEquals( DataType.TUPLE, pack.getResultType() );
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:39,
代码来源:TestNewPlanLogToPhyTranslationVisitor.java

示例24: getTotalInputFileSize

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
/**
 * Get the input size for as many inputs as possible. Inputs that do not report
 * their size nor can pig look that up itself are excluded from this size.
 */
static long getTotalInputFileSize(Configuration conf,
                                  List<POLoad> lds, Job job) throws IOException {
    long totalInputFileSize = 0;
    boolean foundSize = false;
    for (POLoad ld : lds) {
        long size = getInputSizeFromLoader(ld, job);
        if (size > -1) { foundSize = true; }
        if (size > 0) {
            totalInputFileSize += size;
            continue;
        }
        // the input file location might be a list of comma separated files,
        // separate them out
        for (String location : LoadFunc.getPathStrings(ld.getLFile().getFileName())) {
            if (UriUtil.isHDFSFileOrLocalOrS3N(location)) {
                Path path = new Path(location);
                FileSystem fs = path.getFileSystem(conf);
                FileStatus[] status = fs.globStatus(path);
                if (status != null) {
                    for (FileStatus s : status) {
                        totalInputFileSize += Utils.getPathLength(fs, s);
                        foundSize = true;
                    }
                }
            }
        }
    }
    return foundSize ? totalInputFileSize : -1;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:34,
代码来源:InputSizeReducerEstimator.java

示例25: visitLoad

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Override
public void visitLoad(POLoad op) throws VisitorException{
    try{
        nonBlocking(op);
        phyToMROpMap.put(op, curMROp);
    }catch(Exception e){
        int errCode = 2034;
        String msg = "Error compiling operator " + op.getClass().getSimpleName();
        throw new MRCompilerException(msg, errCode, PigException.BUG, e);
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:12,
代码来源:MRCompiler.java

示例26: isDiamondMROper

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
private boolean isDiamondMROper(MapReduceOper mr) {
    
    // We'll remove this mr as part of diamond query optimization
    // only if this mr is a trivial one, that is, it's plan
    // has either two operators (load followed by store) or three operators 
    // (the operator between the load and store must be a foreach,
    // introduced by casting operation).
    // 
    // We won't optimize in other cases where there're more operators
    // in the plan. Otherwise those operators world run multiple times 
    // in the successor MR operators which may not give better
    // performance.
    boolean rtn = false;
    if (isMapOnly(mr)) {
        PhysicalPlan pl = mr.mapPlan;
        if (pl.size() == 2 || pl.size() == 3) {               
            PhysicalOperator root = pl.getRoots().get(0);
            PhysicalOperator leaf = pl.getLeaves().get(0);
            if (root instanceof POLoad && leaf instanceof POStore) {
                if (pl.size() == 3) {
                    PhysicalOperator mid = pl.getSuccessors(root).get(0);
                    if (mid instanceof POForEach) {
                        rtn = true;
                    }                      
                } else {
                    rtn = true;
                }
            }
        }
    }
    return rtn;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:33,
代码来源:MultiQueryOptimizer.java

示例27: IllustratorAttacher

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public IllustratorAttacher(PhysicalPlan plan, LineageTracer lineage, int maxRecords,
    Map<POLoad, LogicalSchema> poLoadToSchemaMap, PigContext hadoopPigContext) throws VisitorException {
    super(plan, new DepthFirstWalker<PhysicalOperator, PhysicalPlan>(plan));
    pigContext = hadoopPigContext;
    this.lineage = lineage;
    poToEqclassesMap = new HashMap<PhysicalOperator, Collection<IdentityHashSet<Tuple>>>();
    poToDataMap = new HashMap<PhysicalOperator, DataBag>();
    this.maxRecords = maxRecords;
    this.poloadToSchemaMap = poLoadToSchemaMap;
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:11,
代码来源:IllustratorAttacher.java

示例28: testGetInputSizeFromFs

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Test
public void testGetInputSizeFromFs() throws Exception {
    long size = 2L * 1024 * 1024 * 1024;
    Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
            CONF, Lists.newArrayList(createPOLoadWithSize(size, new PigStorage())),
            new org.apache.hadoop.mapreduce.Job(CONF)));

    Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
            CONF,
            Lists.newArrayList(createPOLoadWithSize(size, new PigStorageWithStatistics())),
            new org.apache.hadoop.mapreduce.Job(CONF)));

    Assert.assertEquals(size * 2, InputSizeReducerEstimator.getTotalInputFileSize(
            CONF,
            Lists.newArrayList(
                    createPOLoadWithSize(size, new PigStorage()),
                    createPOLoadWithSize(size, new PigStorageWithStatistics())),
                    new org.apache.hadoop.mapreduce.Job(CONF)));

    // Negative test - PIG-3754
    POLoad poLoad = createPOLoadWithSize(size, new PigStorage());
    poLoad.setLFile(new FileSpec("hbase://users", null));

    Assert.assertEquals(-1, InputSizeReducerEstimator.getTotalInputFileSize(
            CONF,
            Collections.singletonList(poLoad),
            new org.apache.hadoop.mapreduce.Job(CONF)));
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:29,
代码来源:TestInputSizeReducerEstimator.java

示例29: testSpl1

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Test
public void testSpl1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lA = GenPhyOp.topLoadOp();
    POSplit spl = GenPhyOp.topSplitOp();
    php.add(lA);
    php.add(spl);
    php.connect(lA, spl);

    POFilter fl1 = GenPhyOp.topFilterOp();
    POFilter fl2 = GenPhyOp.topFilterOp();
    php.add(fl1);
    php.add(fl2);
    php.connect(spl, fl1);
    php.connect(spl, fl2);

    POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
    POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();
    php.add(lr1);
    php.add(lr2);
    php.connect(fl1, lr1);
    php.connect(fl2, lr2);

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.add(gr);
    php.connect(lr1, gr);
    php.connect(lr2, gr);

    POPackage pk = GenPhyOp.topPackageOp();
    php.add(pk);
    php.connect(gr, pk);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);
    php.connect(pk, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC12.gld");

}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:40,
代码来源:TestMRCompiler.java

示例30: testSim1

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Test
public void testSim1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    POLoad ld = GenPhyOp.topLoadOp();
    php.add(ld);
    PhysicalPlan grpChain1 = GenPhyOp.grpChain();
    php.merge(grpChain1);

    php.connect(ld, grpChain1.getRoots().get(0));

    PhysicalOperator leaf = php.getLeaves().get(0);

    PhysicalPlan grpChain2 = GenPhyOp.grpChain();
    php.merge(grpChain2);

    php.connect(leaf, grpChain2.getRoots().get(0));

    leaf = php.getLeaves().get(0);
    POFilter fl = GenPhyOp.topFilterOp();
    php.add(fl);

    php.connect(leaf, fl);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);

    php.connect(fl, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC1.gld");
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:30,
代码来源:TestMRCompiler.java

示例31: setUpHashTable

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
private void setUpHashTable() throws IOException {
    FileSpec replFile = new FileSpec(repl, new FuncSpec(PigStorage.class.getName() + "()"));
    POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile);
    PigContext pc = new PigContext(ExecType.MAPREDUCE, PigMapReduce.sJobConfInternal.get());
    pc.connect();

    ld.setPc(pc);
    Tuple dummyTuple = null;
    for (Result res = ld.getNextTuple(); res.returnStatus != POStatus.STATUS_EOP; res = ld
            .getNextTuple()) {
        Tuple tup = (Tuple)res.result;
        LoadFunc lf = ((LoadFunc)pc.instantiateFuncFromSpec(ld.getLFile().getFuncSpec()));
        String key = lf.getLoadCaster().bytesToCharArray(
                ((DataByteArray)tup.get(keyField)).get());
        Tuple csttup = TupleFactory.getInstance().newTuple(2);
        csttup.set(0, key);
        csttup.set(1, lf.getLoadCaster().bytesToInteger(((DataByteArray)tup.get(1)).get()));
        DataBag vals = null;
        if (replTbl.containsKey(key)) {
            vals = replTbl.get(key);
        }
        else {
            vals = BagFactory.getInstance().newDefaultBag();
            replTbl.put(key, vals);
        }
        vals.add(csttup);
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:29,
代码来源:TestFRJoin.java

示例32: testCogroup

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Test    
public void testCogroup() throws Exception {
    String query = ("a = load 'd.txt' as (name:chararray, age:int, gpa:float);" +
    "b = group a by name;" +
    "store b into 'empty';");  
    
    LogicalPlan newLogicalPlan = buildPlan(query);
    
    PhysicalPlan phyPlan = translatePlan(newLogicalPlan);
    
    assertEquals( 1, phyPlan.getRoots().size() );
    assertEquals( POLoad.class, phyPlan.getRoots().get(0).getClass() );
    POLoad load = (POLoad)phyPlan.getRoots().get(0);
    
    assertEquals( POLocalRearrange.class, phyPlan.getSuccessors(load).get(0).getClass() );
    POLocalRearrange localR = (POLocalRearrange)phyPlan.getSuccessors(load).get(0);
    assertEquals( 1, localR.getInputs().size() );
    assertEquals( 1, localR.getPlans().size() );
    PhysicalPlan cogroupPlan = localR.getPlans().get(0);
    assertEquals( 1, cogroupPlan.getLeaves().size() );        
    assertEquals( POProject.class, cogroupPlan.getLeaves().get(0).getClass() );
    POProject prj = (POProject)cogroupPlan.getLeaves().get(0);
    assertEquals( 0, prj.getColumn() );
    assertEquals( DataType.CHARARRAY, prj.getResultType() );
    
    assertEquals( POGlobalRearrange.class, phyPlan.getSuccessors(localR).get(0).getClass() );
    POGlobalRearrange globalR = (POGlobalRearrange)phyPlan.getSuccessors(localR).get(0);
    assertEquals( DataType.TUPLE, globalR.getResultType() );
    
    assertEquals( POPackage.class, phyPlan.getSuccessors(globalR).get(0).getClass() );
    POPackage pack = (POPackage)phyPlan.getSuccessors(globalR).get(0);
    assertEquals( DataType.TUPLE, pack.getResultType() );
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:34,
代码来源:TestNewPlanLogToPhyTranslationVisitor.java

示例33: areFilesSame

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
public static boolean areFilesSame(FileSpec expLocal, FileSpec actHadoop, PigContext pc) throws ExecException, IOException{
    Random r = new Random();
    
    POLoad ldExp = new POLoad(new OperatorKey("", r.nextLong()));
    ldExp.setPc(pc);
    ldExp.setLFile(expLocal);
    
    POLoad ldAct = new POLoad(new OperatorKey("", r.nextLong()));
    ldAct.setPc(pc);
    ldAct.setLFile(actHadoop);
    
    Tuple t = null;
    int numActTuples = -1;
    DataBag bagAct = DefaultBagFactory.getInstance().newDefaultBag();
    Result resAct = null;
    while((resAct = ldAct.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
        ++numActTuples;
        bagAct.add(trimTuple((Tuple)resAct.result));
    }
    
    int numExpTuples = -1;
    DataBag bagExp = DefaultBagFactory.getInstance().newDefaultBag();
    Result resExp = null;
    while((resExp = ldExp.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
        ++numExpTuples;
        bagExp.add(trimTuple((Tuple)resExp.result));
    }
    
    if(numActTuples!=numExpTuples)
        return false;
    
    return compareBags(bagExp, bagAct);
}
 

开发者ID:sigmoidanalytics,
项目名称:spork-streaming,
代码行数:34,
代码来源:TestHelper.java

示例34: visit

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Override
public void visit(LOLoad loLoad) throws FrontendException {
    String scope = DEFAULT_SCOPE;
    // The last parameter here is set to true as we assume all files are
    // splittable due to LoadStore Refactor
    POLoad load = new POLoad(new OperatorKey(scope, nodeGen
            .getNextNodeId(scope)), loLoad.getLoadFunc());
    load.addOriginalLocation(loLoad.getAlias(), loLoad.getLocation());
    load.setLFile(loLoad.getFileSpec());
    load.setPc(pc);
    load.setResultType(DataType.BAG);
    load.setSignature(loLoad.getSignature());
    load.setLimit(loLoad.getLimit());
    load.setIsTmpLoad(loLoad.isTmpLoad());
    load.setCacheFiles(loLoad.getLoadFunc().getCacheFiles());
    load.setShipFiles(loLoad.getLoadFunc().getShipFiles());

    currentPlan.add(load);
    logToPhyMap.put(loLoad, load);

    // Load is typically a root operator, but in the multiquery
    // case it might have a store as a predecessor.
    List<Operator> op = loLoad.getPlan().getPredecessors(loLoad);
    PhysicalOperator from;

    if(op != null) {
        from = logToPhyMap.get(op.get(0));
        try {
            currentPlan.connect(from, load);
        } catch (PlanException e) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
        }
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:37,
代码来源:LogToPhyTranslationVisitor.java

示例35: visitLoad

点赞 2

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; //导入依赖的package包/类
@Override
public void visitLoad(POLoad ld) throws VisitorException {
    if (ld.getCacheFiles() != null) {
        cacheFiles.addAll(ld.getCacheFiles());
    }
    if (ld.getShipFiles() != null) {
        shipFiles.addAll(ld.getShipFiles());
    }
}
 

开发者ID:sigmoidanalytics,
项目名称:spork,
代码行数:10,
代码来源:UdfCacheShipFilesVisitor.java


版权声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。
喜欢 (0)