Skip to content

Commit f68e9ae

Browse files
authored
Merge branch 'dev' into dev
2 parents 3cd4a7f + d07fd1b commit f68e9ae

File tree

5 files changed

+76
-60
lines changed

5 files changed

+76
-60
lines changed

GeUtilities.Tests/GeUtilities.Tests.csproj

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
<RepositoryUrl>https://github.com/Genometric/GeUtilities</RepositoryUrl>
1919
<Version>4.0.0</Version>
2020
<Description>Implements unit test functions for the GeUtilities.</Description>
21+
<PackageTags>genomics; genome analysis; building-blocks; parser; BED; VCF; GTF; RefSeq;</PackageTags>
22+
<PackageReleaseNotes>A major overhaul on the interfaces, functions signatures, and namespace naming. Namely:
23+
- Parsers constructors take least possible information, and all the other configuration could be set on an instace of the parser class.
24+
- By moving source file name from constructor to Parse function, now single instance of Parser can be used to parse multiple files.
25+
- A major overhaul of classes inheritance with the objective of making them more coherent.</PackageReleaseNotes>
2126
</PropertyGroup>
2227

2328
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">

GeUtilities/GeUtilities.csproj

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,12 @@
1414
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
1515
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
1616
<PackageId>Genometric.GeUtilities</PackageId>
17-
<Description>This package implements a set of tools required to develop any genome data analysis program.</Description>
18-
<PackageReleaseNotes>A major overhaul on the interfaces, and function and name space naming.</PackageReleaseNotes>
17+
<Description>Genome Utilities (GeUtilities) provides open-source building-blocks for genomic data analysis tools.</Description>
18+
<PackageReleaseNotes>A major overhaul on the interfaces, functions signatures, and namespace naming. Namely:
19+
- Parsers constructors take least possible information, and all the other configuration could be set on an instace of the parser class.
20+
- By moving source file name from constructor to Parse function, now single instance of Parser can be used to parse multiple files.
21+
- A major overhaul of classes inheritance with the objective of making them more coherent.</PackageReleaseNotes>
22+
<PackageTags>genomics; genome analysis; building-blocks; parser; BED; VCF; GTF; RefSeq;</PackageTags>
1923
</PropertyGroup>
2024

2125
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">

GeUtilities/IntervalParsers/Parser.cs

Lines changed: 57 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -214,70 +214,70 @@ private void Parse()
214214

215215
while ((line = fileReader.ReadLine()) != null)
216216
{
217-
lineCounter++;
217+
if (++lineCounter > MaxLinesToRead) break;
218+
218219
lineSize += fileReader.CurrentEncoding.GetByteCount(line);
219220
Status = (Math.Round((lineSize * 100.0) / fileSize, 0)).ToString();
220221

221-
if (line.Trim().Length > 0 && lineCounter <= MaxLinesToRead)
222+
if (line.Trim().Length <= 0) continue;
223+
224+
DropReadingPeak = false;
225+
string[] splittedLine = line.Split(Delimiter);
226+
227+
if (!(_leftColumn < splittedLine.Length && int.TryParse(splittedLine[_leftColumn], out left)))
228+
{
229+
DropLine("\tLine " + lineCounter.ToString() + "\t:\tInvalid start/position column number");
230+
continue;
231+
}
232+
233+
if (_rightColumn >= 0 && !(_rightColumn < splittedLine.Length && int.TryParse(splittedLine[_rightColumn], out right)))
234+
{
235+
DropLine("\tLine " + lineCounter.ToString() + "\t:\tInvalid stop column number");
236+
continue;
237+
}
238+
239+
I readingInterval = BuildInterval(left, right, splittedLine, lineCounter);
240+
if (DropReadingPeak)
241+
continue;
242+
243+
chrName = null;
244+
if (_chrColumn < splittedLine.Length)
222245
{
223-
DropReadingPeak = false;
224-
string[] splittedLine = line.Split(Delimiter);
225-
226-
if (!(_leftColumn < splittedLine.Length && int.TryParse(splittedLine[_leftColumn], out left)))
227-
{
228-
DropLine("\tLine " + lineCounter.ToString() + "\t:\tInvalid start/position column number");
229-
continue;
230-
}
231-
232-
if (_rightColumn >= 0 && !(_rightColumn < splittedLine.Length && int.TryParse(splittedLine[_rightColumn], out right)))
233-
{
234-
DropLine("\tLine " + lineCounter.ToString() + "\t:\tInvalid stop column number");
235-
continue;
236-
}
237-
238-
I readingInterval = BuildInterval(left, right, splittedLine, lineCounter);
239-
if (DropReadingPeak)
240-
continue;
241-
242-
chrName = null;
243-
if (_chrColumn < splittedLine.Length)
244-
{
245-
if (Regex.IsMatch(splittedLine[_chrColumn].ToLower(), "chr"))
246-
chrName = splittedLine[_chrColumn];
247-
else if (int.TryParse(splittedLine[_chrColumn], out int chrNumber))
248-
chrName = "chr" + chrNumber;
249-
else if (_assemblyData.ContainsKey("chr" + splittedLine[_chrColumn]))
250-
chrName = "chr" + splittedLine[_chrColumn];
251-
else
252-
chrName = splittedLine[_chrColumn];
253-
if (ReadOnlyAssemblyChrs && !_assemblyData.ContainsKey(chrName))
254-
chrName = null;
255-
}
256-
if (chrName == null)
257-
{
258-
DropLine("\tLine " + lineCounter.ToString() + "\t:\tInvalid chromosome number ( " + splittedLine[_chrColumn].ToString() + " )");
259-
continue;
260-
}
246+
if (Regex.IsMatch(splittedLine[_chrColumn].ToLower(), "chr"))
247+
chrName = splittedLine[_chrColumn];
248+
else if (int.TryParse(splittedLine[_chrColumn], out int chrNumber))
249+
chrName = "chr" + chrNumber;
250+
else if (_assemblyData.ContainsKey("chr" + splittedLine[_chrColumn]))
251+
chrName = "chr" + splittedLine[_chrColumn];
252+
else
253+
chrName = splittedLine[_chrColumn];
254+
if (ReadOnlyAssemblyChrs && !_assemblyData.ContainsKey(chrName))
255+
chrName = null;
256+
}
257+
if (chrName == null)
258+
{
259+
DropLine("\tLine " + lineCounter.ToString() + "\t:\tInvalid chromosome number ( " + splittedLine[_chrColumn].ToString() + " )");
260+
continue;
261+
}
261262

263+
strand = '*';
264+
if (_strandColumn != -1 && _strandColumn < line.Length &&
265+
(char.TryParse(splittedLine[_strandColumn], out strand) && strand != '+' && strand != '-' && strand != '*'))
262266
strand = '*';
263-
if (_strandColumn != -1 && _strandColumn < line.Length &&
264-
(char.TryParse(splittedLine[_strandColumn], out strand) && strand != '+' && strand != '-' && strand != '*'))
265-
strand = '*';
266-
267-
switch (HashFunction)
268-
{
269-
case HashFunctions.FNV:
270-
readingInterval.HashKey = FNVHashFunction(readingInterval, lineCounter);
271-
break;
272-
273-
default:
274-
readingInterval.HashKey = OneAtATimeHashFunction(readingInterval, lineCounter);
275-
break;
276-
}
277-
278-
_data.Add(readingInterval, chrName, strand);
279-
_data.IntervalsCount++;
267+
268+
switch (HashFunction)
269+
{
270+
case HashFunctions.FNV:
271+
readingInterval.HashKey = FNVHashFunction(readingInterval, lineCounter);
272+
break;
273+
274+
default:
275+
readingInterval.HashKey = OneAtATimeHashFunction(readingInterval, lineCounter);
276+
break;
280277
}
278+
279+
_data.Add(readingInterval, chrName, strand);
280+
_data.IntervalsCount++;
281281
}
282282
}
283283
}

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# GeUtilities
1+
# GeUtilities
2+
[![Latest version](https://img.shields.io/nuget/v/Genometric.GeUtilities.svg?style=for-the-badge)](https://www.nuget.org/packages/Genometric.GeUtilities/)
23

34
Genome Utilities (GeUtilities) provides open-source building-blocks for genomic data analysis tools. The following components are currently implemented:
45
- **IGenomics**: interfaces to build portable objects. For instance, ChIP-seq peaks, variations, or general features.

appveyor.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ for:
6161
$env:PATH = 'C:\msys64\usr\bin;' + $env:PATH
6262
Invoke-WebRequest -Uri 'https://codecov.io/bash' -OutFile codecov.sh
6363
bash codecov.sh -f "GeUtilities.Tests/GeUtilities_coverage.xml" -t $env:codecovtoken
64+
- MSBuild.SonarQube.Runner.exe begin /k:"geutilities" /o:"genometric" /d:"sonar.host.url=https://sonarcloud.io" /d:"sonar.login=%sonartoken%" /d:sonar.cs.opencover.reportsPaths="\GeUtilities.Tests\GeUtilities_coverage.xml"
65+
- MSBuild.exe /t:Rebuild
66+
- ps: cd GeUtilities.Tests
67+
- OpenCover.Console.exe -register:"user" -target:"C:/Program Files/dotnet/dotnet.exe" -targetargs:"xunit -noshadow" -filter:"+[*]*" -oldStyle -output:"GeUtilities_coverage.xml"
68+
- ps: cd ..
69+
- MSBuild.SonarQube.Runner.exe end /d:"sonar.login=%sonartoken%"
6470

6571
artifacts:
6672
- path: GeUtilities/bin/Release/Genometric.GeUtilities*.nupkg

0 commit comments

Comments
 (0)