Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/branch_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v7

- name: Create branches and PRs
uses: LabKey/gitHubActions/branch-release@develop
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,13 @@ jobs:
LK_VERSION=$(cat /home/runner/work/_temp/_github_home/lkDist/release.txt | sed 's/-SNAPSHOT//g')
cd /home/runner/work/_temp/_github_home/labkey_build/${LK_VERSION}/server/server/modules/DiscvrLabKeyModules
COUNT=$(gh release list | grep 'latest' | wc -l)
if [ $COUNT != '0' ];then gh release delete 'latest' --cleanup-tag -y; fi
if [ $COUNT != '0' ];then
echo 'Deleting existing release'
gh release delete 'latest' --cleanup-tag -y;
fi
git push -f origin "latest"
git push -f origin 'latest'
sleep 100 # gh release periodically fails due to a missing tag.
gh release create 'latest' --verify-tag --generate-notes --prerelease --title "Development Build: ${{ env.DEFAULT_BRANCH }}"
gh release upload 'latest' /home/runner/work/_temp/_github_home/lkDist/discvr/DISCVR-*
2 changes: 1 addition & 1 deletion .github/workflows/merge_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v7

- name: Merge PR
uses: LabKey/gitHubActions/merge-release@develop
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.labkey.api.data.CompareType;
import org.labkey.api.data.Container;
import org.labkey.api.data.ContainerManager;
import org.labkey.api.data.DbSchema;
import org.labkey.api.data.DbSchemaType;
import org.labkey.api.data.SimpleFilter;
import org.labkey.api.data.Sort;
import org.labkey.api.data.TableInfo;
Expand All @@ -16,7 +18,9 @@
import org.labkey.api.exp.api.ExpRun;
import org.labkey.api.exp.api.ExperimentService;
import org.labkey.api.ldk.LDKService;
import org.labkey.api.pipeline.CancelledException;
import org.labkey.api.pipeline.PipeRoot;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.PipelineService;
import org.labkey.api.pipeline.PipelineStatusFile;
Expand Down Expand Up @@ -57,6 +61,9 @@
*/
public class SequenceAnalysisMaintenanceTask implements MaintenanceTask
{
private static final String SYSTEM_MAINTENANCE_DESCRIPTION = "System Maintenance";
private static final String JOB_TABLE = "statusfiles";

public SequenceAnalysisMaintenanceTask()
{

Expand All @@ -74,6 +81,31 @@ public String getName()
return "DeleteSequenceAnalysisArtifacts";
}

// NOTE: if there is a more direct way to locate the JobID this hack should be replaced
private void checkJobCancelled(Logger log)
{
// Make the assumption there is only one active maintenance job at a time:
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("description"), SYSTEM_MAINTENANCE_DESCRIPTION).
addCondition(FieldKey.fromString("container"), ContainerManager.getRoot().getId()).
addCondition(FieldKey.fromString("modified"), new Date(), CompareType.DATE_EQUAL);
int rowId = new TableSelector(DbSchema.get("pipeline", DbSchemaType.Module).getTable(JOB_TABLE), PageFlowUtil.set("RowId", "Status"), filter, null).getMapCollection().stream().filter(map -> {
String val = String.valueOf(map.get("status"));
return val != null && (val.toLowerCase().startsWith(PipelineJob.TaskStatus.cancelling.name()) || val.toLowerCase().startsWith(PipelineJob.TaskStatus.running.name()));
}).map(rs -> Integer.parseInt(String.valueOf(rs.get("rowid")))).max(Integer::compareTo).orElse(-1);

if (rowId == -1)
{
log.warn("Unable to find rowId for job", new Exception("Unable to find rowId for job"));
return;
}

PipelineStatusFile sf = PipelineService.get().getStatusFile(rowId);
if (PipelineJob.TaskStatus.cancelling.name().equalsIgnoreCase(sf.getStatus()))
{
throw new CancelledException();
}
}

@Override
public void run(Logger log)
{
Expand Down Expand Up @@ -158,6 +190,7 @@ private void verifySequenceDataPresent(Logger log)
if (i % 1000 == 0)
{
log.info("readdata " + i + " of " + readDatas.size() + ". Current container: " + ContainerManager.getForId(rd.getContainer()).getPath());
checkJobCancelled(log);
}

if (rd.getFileId1() != null)
Expand Down Expand Up @@ -221,6 +254,7 @@ else if (!d.getFile().exists())
if (i % 1000 == 0)
{
log.info("analysis " + i + " of " + analyses.size() + ". Current container: " + ContainerManager.getForId(m.getContainer()).getPath());
checkJobCancelled(log);
}

if (m.getAlignmentFile() != null)
Expand Down Expand Up @@ -296,7 +330,11 @@ else if (sf.getFilePath() == null)
private void processContainer(Container c, Logger log) throws IOException, PipelineJobException
{
if (!c.isWorkbook())
{
log.info("processing container: " + c.getPath());
}

checkJobCancelled(log);

PipeRoot root = PipelineService.get().getPipelineRootSetting(c);
if (root != null && !root.isCloudRoot())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
import org.labkey.sequenceanalysis.run.preprocessing.FastqcProcessingStep;
import org.labkey.sequenceanalysis.run.preprocessing.FilterReadsStep;
import org.labkey.sequenceanalysis.run.preprocessing.FlashPipelineStep;
import org.labkey.sequenceanalysis.run.preprocessing.Kraken2Step;
import org.labkey.sequenceanalysis.run.preprocessing.PrintReadsContainingStep;
import org.labkey.sequenceanalysis.run.preprocessing.TagPcrSummaryStep;
import org.labkey.sequenceanalysis.run.preprocessing.TrimmomaticWrapper;
Expand Down Expand Up @@ -291,7 +292,7 @@ public static void registerPipelineSteps()
SequencePipelineService.get().registerPipelineStep(new CutadaptWrapper.Provider());
SequencePipelineService.get().registerPipelineStep(new FastqcProcessingStep.Provider());
SequencePipelineService.get().registerPipelineStep(new CutadaptCropWrapper.Provider());
//SequencePipelineService.get().registerPipelineStep(new BlastFilterPipelineStep.Provider());
SequencePipelineService.get().registerPipelineStep(new Kraken2Step.Provider());

//ref library
SequencePipelineService.get().registerPipelineStep(new DNAReferenceLibraryStep.Provider());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
package org.labkey.sequenceanalysis.run.preprocessing;

import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.json.JSONObject;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.PipelineJobService;
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.PreprocessingStep;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
import org.labkey.api.util.Compress;
import org.labkey.api.util.FileUtil;
import org.labkey.api.util.Pair;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class Kraken2Step extends AbstractCommandPipelineStep<Kraken2Step.Kraken2Wrapper> implements PreprocessingStep
{
private static final String DB_PARAM = "db";
private static final String MODE_PARAM = "mode";

public Kraken2Step(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new Kraken2Wrapper(ctx.getLogger()));
}

public static class Provider extends AbstractPipelineStepProvider<PreprocessingStep>
{
public Provider()
{
super("Kraken2", "Kraken2", "Kraken2", "This step aligns input reads against a reference using BWA-mem and will only return read pairs without a passing hit in either read.", Arrays.asList(
ToolParameterDescriptor.create(DB_PARAM, "Database", "This determines the DB for positive or negative selection", "ldk-simplecombo", new JSONObject(){{
put("storeValues", "kraken2_bv;kraken2_standard");
put("multiSelect", false);
put("allowBlank", false);
put("joinReturnValue", true);
put("delimiter", ";");
}}, "kraken2_bv"),
ToolParameterDescriptor.create(MODE_PARAM, "Reads To Retain", "This determines which set of reads is passed to the next step. If 'Retain Classified' is selected, then reads matching the DB are retained. if 'Retain Unclassified' is selected, then reads that do not match the DB are retained", "ldk-simplecombo", new JSONObject(){{
put("storeValues", "Classified;Unclassified");
put("multiSelect", false);
put("allowBlank", false);
put("joinReturnValue", true);
put("delimiter", ";");
}}, null),
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--memory-mapping"), "memoryMapping", "Memory Mapping", "If checked, the DB will not be read into memory, reducing RAM", "checkbox", null, false),
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--minimum-hit-groups"), "minimumHitGroups", "Minimum Hit Groups", "Minimum number of hit groups (overlapping k-mers sharing the same minimizer) needed to make a call", "ldk-integerfield", new JSONObject(){{
put("minValue", 0);
}}, 2),
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--confidence"), "confidence", "Confidence", "Confidence score threshold (0-1)", "ldk-numberfield", new JSONObject(){{
put("minValue", 0);
put("maxValue", 1);
put("decimalPrecision", 2);
}}, 0)
), null, "https://github.com/DerrickWood/kraken2");
}

@Override
public Kraken2Step create(PipelineContext context)
{
return new Kraken2Step(this, context);
}
}

@Override
public Output processInputFile(File inputFile, @Nullable File inputFile2, File outputDir) throws PipelineJobException
{
PreprocessingOutputImpl output = new PreprocessingOutputImpl(inputFile, inputFile2);

List<String> args = new ArrayList<>();
args.add(getWrapper().getExe().getPath());

if (inputFile2 != null)
{
args.add("--paired");
}

if (inputFile.getName().toLowerCase().endsWith(".gz"))
{
args.add("--gzip-compressed");
}

Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
if (threads != null)
{
args.add("--threads");
args.add(threads.toString());
}

String dbName = getProvider().getParameterByName(DB_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
if (dbName == null)
{
throw new PipelineJobException("Missing DB name");
}

File binDir = FileUtil.appendName(new File(PipelineJobService.get().getAppProperties().getToolsDirectory()), "kraken2_dbs");
if (!binDir.exists())
{
throw new PipelineJobException("Unable to find kraken2 DB dir, expected: " + binDir.getAbsolutePath());
}

File dbDir = FileUtil.appendName(binDir, dbName);
if (!dbDir.exists())
{
throw new PipelineJobException("Unable to find kraken2 DB dir, expected: " + dbDir.getAbsolutePath());
}

args.add("--use-names");

args.add("--db");
args.add(dbDir.getAbsolutePath());

args.addAll(getClientCommandArgs());

args.add("--output");
args.add("-");

String mode = getProvider().getParameterByName(MODE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);

File classifiedOutputBase = FileUtil.appendName(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(inputFile.getName()) + ".classified");
File unclassifiedOutputBase = FileUtil.appendName(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(inputFile.getName()) + ".unclassified");
if ("Classified".equals(mode))
{
args.add("--classified-out");
args.add(classifiedOutputBase.getPath() + "#.fq");
}
else
{
args.add("--unclassified-out");
args.add(unclassifiedOutputBase.getPath() + "#.fq");
}

File reportFile = FileUtil.appendName(outputDir, SequencePipelineService.get().getUnzippedBaseName(inputFile.getName()) + ".kraken2.report.txt");
args.add("--report");
args.add(reportFile.getPath());

args.add(inputFile.getPath());
if (inputFile2 != null)
{
args.add(inputFile2.getPath());
}

getWrapper().execute(args);

if ("Classified".equals(mode))
{
File classified1 = new File(classifiedOutputBase.getPath() + "_1.fq");
File classified2 = inputFile2 == null ? null : new File(classifiedOutputBase.getPath() + "_2.fq");
if (!classified1.exists())
{
throw new PipelineJobException("Classified file does not exist: " + classified1.getAbsolutePath());
}

File compressed1 = Compress.compressGzip(classified1);
output.addIntermediateFile(classified1);

File compressed2 = classified2 == null ? null : Compress.compressGzip(classified2);
if (classified2 != null)
{
output.addIntermediateFile(classified2);
}

output.setProcessedFastq(Pair.of(compressed1, compressed2));
}
else
{
File unclassified1 = new File(unclassifiedOutputBase.getPath() + "_1.fq");
File unclassified2 = inputFile2 == null ? null : new File(unclassifiedOutputBase.getPath() + "_2.fq");
if (!unclassified1.exists())
{
throw new PipelineJobException("Unclassified file does not exist: " + unclassified1.getAbsolutePath());
}

File compressed1 = Compress.compressGzip(unclassified1);
output.addIntermediateFile(unclassified1);

File compressed2 = unclassified2 == null ? null : Compress.compressGzip(unclassified2);
if (unclassified2 != null)
{
output.addIntermediateFile(unclassified2);
}

output.setProcessedFastq(Pair.of(compressed1, compressed2));
}

return output;
}

public static class Kraken2Wrapper extends AbstractCommandWrapper
{
public Kraken2Wrapper(Logger log)
{
super(log);
}

public File getExe()
{
return SimpleScriptWrapper.resolveFileInPath("kraken2", null, true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ private void analysisPanelTest() throws Exception
waitForElementToDisappear(Ext4Helper.Locators.window("Add Steps"));

Map<String, Ext4CmpRef> fieldsetMap = new HashMap<>();
String[] setNames = {"Adapter Trimming (Trimmomatic)", "Average Quality Filter", "Crop Reads", "Downsample Reads", "Filter Reads Matching Reference", "Head Crop", "Quality Trimming (Adaptive)", "Quality Trimming (Sliding Window)", "Read Length Filter"};
String[] setNames = {"Adapter Trimming (Trimmomatic)", "Average Quality Filter", "Crop Reads", "Downsample Reads", "Filter Reads Matching Reference", "Head Crop", "Quality Trimming (Adaptive)", "Quality Trimming (Sliding Window)", "Read Length Filter", "Kraken2"};
isPresentInThisOrder(setNames);

for (String name : setNames)
Expand All @@ -628,6 +628,9 @@ private void analysisPanelTest() throws Exception
waitAndClick(Locator.id(fieldsetMap.get("Head Crop").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a")));
waitForElementToDisappear(Locator.id(fieldsetMap.get("Head Crop").getId()));

waitAndClick(Locator.id(fieldsetMap.get("Kraken2").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a")));
waitForElementToDisappear(Locator.id(fieldsetMap.get("Kraken2").getId()));

Integer overlapLength = 6;
Double errorRate = 0.2;
Integer cropLength = 500;
Expand Down
Loading