• 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏吧

Java GcsUtil类的典型用法和代码示例

java 1次浏览

本文整理汇总了Java中org.apache.beam.sdk.util.GcsUtil的典型用法代码示例。如果您正苦于以下问题:Java GcsUtil类的具体用法?Java GcsUtil怎么用?Java GcsUtil使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

GcsUtil类属于org.apache.beam.sdk.util包,在下文中一共展示了GcsUtil类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: buildPipelineOptions

点赞 3

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
    @Override
    public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
      return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
    }
  });
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new LinkedList<String>());
  options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
  options.setGcsUtil(mockGcsUtil);
  return options;
}
 

开发者ID:apache,
项目名称:beam,
代码行数:23,
代码来源:DataflowPipelineTranslatorTest.java

示例2: buildPipelineOptions

点赞 3

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
private static DataflowPipelineOptions buildPipelineOptions(String ... args) throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
    @Override
    public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
      return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
    }
  });
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new LinkedList<String>());
  options.setGcsUtil(mockGcsUtil);
  return options;
}
 

开发者ID:apache,
项目名称:beam,
代码行数:23,
代码来源:BatchStatefulParDoOverridesTest.java

示例3: match

点赞 2

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
@Override
protected List<MatchResult> match(List<String> specs) throws IOException {
  List<GcsPath> gcsPaths = toGcsPaths(specs);

  List<GcsPath> globs = Lists.newArrayList();
  List<GcsPath> nonGlobs = Lists.newArrayList();
  List<Boolean> isGlobBooleans = Lists.newArrayList();

  for (GcsPath path : gcsPaths) {
    if (GcsUtil.isWildcard(path)) {
      globs.add(path);
      isGlobBooleans.add(true);
    } else {
      nonGlobs.add(path);
      isGlobBooleans.add(false);
    }
  }

  Iterator<MatchResult> globsMatchResults = matchGlobs(globs).iterator();
  Iterator<MatchResult> nonGlobsMatchResults = matchNonGlobs(nonGlobs).iterator();

  ImmutableList.Builder<MatchResult> ret = ImmutableList.builder();
  for (Boolean isGlob : isGlobBooleans) {
    if (isGlob) {
      checkState(globsMatchResults.hasNext(), "Expect globsMatchResults has next: %s", globs);
      ret.add(globsMatchResults.next());
    } else {
      checkState(
          nonGlobsMatchResults.hasNext(), "Expect nonGlobsMatchResults has next: %s", nonGlobs);
      ret.add(nonGlobsMatchResults.next());
    }
  }
  checkState(!globsMatchResults.hasNext(), "Expect no more elements in globsMatchResults.");
  checkState(!nonGlobsMatchResults.hasNext(), "Expect no more elements in nonGlobsMatchResults.");
  return ret.build();
}
 

开发者ID:apache,
项目名称:beam,
代码行数:37,
代码来源:GcsFileSystem.java

示例4: expand

点赞 2

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
/**
 * Expands a pattern into {@link MatchResult}.
 *
 * @throws IllegalArgumentException if {@code gcsPattern} does not contain globs.
 */
@VisibleForTesting
MatchResult expand(GcsPath gcsPattern) throws IOException {
  String prefix = GcsUtil.getNonWildcardPrefix(gcsPattern.getObject());
  Pattern p = Pattern.compile(GcsUtil.wildcardToRegexp(gcsPattern.getObject()));

  LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(),
      prefix, p.toString());

  String pageToken = null;
  List<Metadata> results = new LinkedList<>();
  do {
    Objects objects = options.getGcsUtil().listObjects(gcsPattern.getBucket(), prefix, pageToken);
    if (objects.getItems() == null) {
      break;
    }

    // Filter objects based on the regex.
    for (StorageObject o : objects.getItems()) {
      String name = o.getName();
      // Skip directories, which end with a slash.
      if (p.matcher(name).matches() && !name.endsWith("/")) {
        LOG.debug("Matched object: {}", name);
        results.add(toMetadata(o));
      }
    }
    pageToken = objects.getNextPageToken();
  } while (pageToken != null);
  return MatchResult.create(Status.OK, results);
}
 

开发者ID:apache,
项目名称:beam,
代码行数:35,
代码来源:GcsFileSystem.java

示例5: IntegrationTestHelper

点赞 2

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
public IntegrationTestHelper() {
  assertNotNull("You must set the TEST_PROJECT environment variable for this test.", TEST_PROJECT);
  assertNotNull("You must set the TEST_OUTPUT_GCS_FOLDER environment variable for this test.", TEST_OUTPUT_GCS_FOLDER);
  assertNotNull("You must set the TEST_STAGING_GCS_FOLDER environment variable for this test.", TEST_STAGING_GCS_FOLDER);
  assertTrue("TEST_OUTPUT_GCS_FOLDER must end with '/'", TEST_OUTPUT_GCS_FOLDER.endsWith("/"));
  assertTrue("TEST_OUTPUT_GCS_FOLDER must start with 'gs://'", TEST_OUTPUT_GCS_FOLDER.startsWith("gs://"));
  assertTrue("TEST_STAGING_GCS_FOLDER must start with 'gs://'", TEST_STAGING_GCS_FOLDER.startsWith("gs://"));
  // we don't care how TEST_STAGING_GCS_FOLDER ends, so no check for it.

  gcsUtil = new GcsUtil.GcsUtilFactory().create(popts);
}
 

开发者ID:googlegenomics,
项目名称:dataflow-java
代码行数:12,
代码来源:IntegrationTestHelper.java

示例6: getGcsUtil

点赞 2

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
/**
 * The GcsUtil instance that should be used to communicate with Google Cloud Storage.
 */
@JsonIgnore
@Description("The GcsUtil instance that should be used to communicate with Google Cloud Storage.")
@Default.InstanceFactory(GcsUtil.GcsUtilFactory.class)
@Hidden
GcsUtil getGcsUtil();
 

开发者ID:apache,
项目名称:beam,
代码行数:9,
代码来源:GcsOptions.java

示例7: testRunWithFiles

点赞 2

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
@Test
public void testRunWithFiles() throws IOException {
  // Test that the function DataflowRunner.stageFiles works as expected.
  final String cloudDataflowDataset = "somedataset";

  // Create some temporary files.
  File temp1 = File.createTempFile("DataflowRunnerTest", "txt");
  temp1.deleteOnExit();
  File temp2 = File.createTempFile("DataflowRunnerTest2", "txt");
  temp2.deleteOnExit();

  String overridePackageName = "alias.txt";

  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(ImmutableList.of(GcsUtil.StorageObjectOrIOException.create(
          new FileNotFoundException("some/path"))));

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setFilesToStage(ImmutableList.of(
      temp1.getAbsolutePath(),
      overridePackageName + "=" + temp2.getAbsolutePath()));
  options.setStagingLocation(VALID_STAGING_BUCKET);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setTempDatasetId(cloudDataflowDataset);
  options.setProject(PROJECT_ID);
  options.setRegion(REGION_ID);
  options.setJobName("job");
  options.setDataflowClient(buildMockDataflow());
  options.setGcsUtil(mockGcsUtil);
  options.setGcpCredential(new TestCredential());

  when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt()))
      .then(
          new Answer<SeekableByteChannel>() {
            @Override
            public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
              return FileChannel.open(
                  Files.createTempFile("channel-", ".tmp"),
                  StandardOpenOption.CREATE,
                  StandardOpenOption.WRITE,
                  StandardOpenOption.DELETE_ON_CLOSE);
            }
          });

  Pipeline p = buildDataflowPipeline(options);

  DataflowPipelineJob job = (DataflowPipelineJob) p.run();
  assertEquals("newid", job.getJobId());

  ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
  Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
  Job workflowJob = jobCaptor.getValue();
  assertValidJob(workflowJob);

  assertEquals(
      2,
      workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
  DataflowPackage workflowPackage1 =
      workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
  assertThat(workflowPackage1.getName(), startsWith(temp1.getName()));
  DataflowPackage workflowPackage2 =
      workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
  assertEquals(overridePackageName, workflowPackage2.getName());

  assertEquals(
      GcsPath.fromUri(VALID_TEMP_BUCKET).toResourceName(),
      workflowJob.getEnvironment().getTempStoragePrefix());
  assertEquals(
      cloudDataflowDataset,
      workflowJob.getEnvironment().getDataset());
  assertEquals(
      DataflowRunnerInfo.getDataflowRunnerInfo().getName(),
      workflowJob.getEnvironment().getUserAgent().get("name"));
  assertEquals(
      DataflowRunnerInfo.getDataflowRunnerInfo().getVersion(),
      workflowJob.getEnvironment().getUserAgent().get("version"));
}
 

开发者ID:apache,
项目名称:beam,
代码行数:78,
代码来源:DataflowRunnerTest.java

示例8: processElement

点赞 2

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
@ProcessElement
public void processElement(DoFn<Read, String>.ProcessContext c, BoundedWindow window)
    throws Exception {

  this.window = window;

  if (headerInfo == null) {
    headerInfo = c.sideInput(headerView);
  }
  final Read read = c.element();

  if (readCount == 0) {

    shardContig = KeyReadsFn.shardKeyForRead(read, 1);
    sequenceIndex = headerInfo.header.getSequenceIndex(shardContig.referenceName);
    final boolean isFirstShard = headerInfo.shardHasFirstRead(shardContig);
    final String outputFileName = options.getOutput();
    shardName = outputFileName + "-" + String.format("%012d", sequenceIndex) + "-"
        + shardContig.referenceName
        + ":" + String.format("%012d", shardContig.start);
    LOG.info("Writing shard file " + shardName);
    final OutputStream outputStream =
        Channels.newOutputStream(
            new GcsUtil.GcsUtilFactory().create(options)
              .create(GcsPath.fromUri(shardName),
                  BAMIO.BAM_INDEX_FILE_MIME_TYPE));
    ts = new TruncatedOutputStream(
        outputStream, BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
    bw = new BAMBlockWriter(ts, null /*file*/);
    bw.setSortOrder(headerInfo.header.getSortOrder(), true);
    bw.setHeader(headerInfo.header);
    if (isFirstShard) {
      LOG.info("First shard - writing header to " + shardName);
      bw.writeHeader(headerInfo.header);
    }
  }
  SAMRecord samRecord = ReadUtils.makeSAMRecord(read, headerInfo.header);
  if (prevRead != null && prevRead.getAlignmentStart() > samRecord.getAlignmentStart()) {
    LOG.info("Out of order read " + prevRead.getAlignmentStart() + " " +
        samRecord.getAlignmentStart() + " during writing of shard " + shardName +
        " after processing " + readCount + " reads, min seen alignment is " +
        minAlignment + " and max is " + maxAlignment + ", this read is " +
        (samRecord.getReadUnmappedFlag() ? "unmapped" : "mapped") + " and its mate is " +
        (samRecord.getMateUnmappedFlag() ? "unmapped" : "mapped"));
    Metrics.counter(WriteBAMFn.class, "Out of order reads").inc();
    readCount++;
    hadOutOfOrder = true;
    return;
  }
  minAlignment = Math.min(minAlignment, samRecord.getAlignmentStart());
  maxAlignment = Math.max(maxAlignment, samRecord.getAlignmentStart());
  prevRead = samRecord;
  if (samRecord.getReadUnmappedFlag()) {
    if (!samRecord.getMateUnmappedFlag()) {
      samRecord.setReferenceName(samRecord.getMateReferenceName());
      samRecord.setAlignmentStart(samRecord.getMateAlignmentStart());
    }
    unmappedReadCount++;
  }
  bw.addAlignment(samRecord);
  readCount++;
}
 

开发者ID:googlegenomics,
项目名称:dataflow-java
代码行数:63,
代码来源:WriteBAMFn.java

示例9: setGcsUtil

点赞 1

import org.apache.beam.sdk.util.GcsUtil; //导入依赖的package包/类
void setGcsUtil(GcsUtil value); 

开发者ID:apache,
项目名称:beam,
代码行数:2,
代码来源:GcsOptions.java


版权声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。
喜欢 (0)