fqfa/paper.bib at main · CountESS-Project/fqfa · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

@techreport{shirley_efficient_2015,
	title = {Efficient "pythonic" access to {FASTA} files using pyfaidx},
	url = {https://peerj.com/preprints/970},
	abstract = {The pyfaidx Python module provides memory and time-efficient indexing, subsetting, and in-place modification of subsequences of FASTA files. pyfaidx provides Python classes that expose a dictionary interface where sequences from an indexed FASTA can be accessed by their header name and then sliced by position without reading the full file into memory. pyfaidx includes an extensive test suite to ensure correct and reproducible behavior. A command-line program (faidx) is also provided as an alternative interface, with significant enhancements to functionality, while maintaining full index file compatibility with samtools. The pyfaidx module is installable from PyPI (https://pypi.python.org/pypi/pyfaidx), and development versions can be found at Github (https://github.com/mdshw5/pyfaidx).},
	language = {en},
	number = {e1196},
	urldate = {2020-02-03},
	institution = {PeerJ Inc.},
	author = {Shirley, Matthew D. and Ma, Zhaorong and Pedersen, Brent S. and Wheelan, Sarah J.},
	month = apr,
	year = {2015},
	doi = {10.7287/peerj.preprints.970v1}
}

@misc{du_lmdupyfastx_2019,
	title = {lmdu/pyfastx},
	copyright = {MIT},
	url = {https://github.com/lmdu/pyfastx},
	abstract = {a python package for fast random access to sequences from plain and gzipped FASTA/Q files},
	urldate = {2020-02-03},
	author = {Du, Lianming},
	month = mar,
	year = {2019},
	note = {original-date: 2019-03-19T13:55:46Z},
	keywords = {assembly, biology, dna, fasta, genome, python, sequence}
}

@misc{pedersen_brentppyfasta_2010,
	title = {brentp/pyfasta},
	url = {https://github.com/brentp/pyfasta},
	abstract = {fast, memory-efficient, pythonic (and command-line) access to fasta sequence files},
	urldate = {2020-02-03},
	author = {Pedersen, Brent},
	month = jul,
	year = {2010},
	note = {original-date: 2010-07-14T22:45:27Z}
}

@article{cock_biopython_2009,
	title = {Biopython: freely available {Python} tools for computational molecular biology and bioinformatics},
	volume = {25},
	issn = {1367-4803},
	shorttitle = {Biopython},
	url = {https://academic.oup.com/bioinformatics/article/25/11/1422/330687},
	doi = {10.1093/bioinformatics/btp163},
	abstract = {Abstract.  Summary: The Biopython project is a mature open source international collaboration of volunteer developers, providing Python libraries for a wide ran},
	language = {en},
	number = {11},
	urldate = {2020-02-03},
	journal = {Bioinformatics},
	author = {Cock, Peter J. A. and Antao, Tiago and Chang, Jeffrey T. and Chapman, Brad A. and Cox, Cymon J. and Dalke, Andrew and Friedberg, Iddo and Hamelryck, Thomas and Kauff, Frank and Wilczynski, Bartek and de Hoon, Michiel J. L.},
	month = jun,
	year = {2009},
	pages = {1422--1423}
}

@misc{noauthor_biocorescikit-bio_2013,
	title = {biocore/scikit-bio},
	url = {https://github.com/biocore/scikit-bio},
	abstract = {scikit-bio is an open-source, BSD-licensed, Python package providing data structures, algorithms, and educational resources for bioinformatics.},
	urldate = {2020-02-03},
	publisher = {biocore},
	author = {{scikit-bio Development Team}},
	month = dec,
	year = {2013},
	note = {original-date: 2013-12-13T16:24:41Z}
}

@article{cock_sanger_2010,
	title = {The {Sanger} {FASTQ} file format for sequences with quality scores, and the {Solexa}/{Illumina} {FASTQ} variants},
	volume = {38},
	issn = {0305-1048},
	url = {https://academic.oup.com/nar/article/38/6/1767/3112533},
	doi = {10.1093/nar/gkp1137},
	abstract = {ABSTRACT.  FASTQ has emerged as a common file format for sharing sequencing read data combining both the sequence and an associated per base quality score, desp},
	language = {en},
	number = {6},
	urldate = {2020-02-03},
	journal = {Nucleic Acids Research},
	author = {Cock, Peter J. A. and Fields, Christopher J. and Goto, Naohisa and Heuer, Michael L. and Rice, Peter M.},
	month = apr,
	year = {2010},
	pages = {1767--1771}
}

@article{pearson_improved_1988,
	title = {Improved tools for biological sequence comparison},
	volume = {85},
	issn = {0027-8424, 1091-6490},
	url = {https://www.pnas.org/content/85/8/2444},
	doi = {10.1073/pnas.85.8.2444},
	abstract = {We have developed three computer programs for comparisons of protein and DNA sequences. They can be used to search sequence data bases, evaluate similarity scores, and identify periodic structures based on local sequence similarity. The FASTA program is a more sensitive derivative of the FASTP program, which can be used to search protein or DNA sequence data bases and can compare a protein sequence to a DNA sequence data base by translating the DNA data base as it is searched. FASTA includes an additional step in the calculation of the initial pairwise similarity score that allows multiple regions of similarity to be joined to increase the score of related sequences. The RDF2 program can be used to evaluate the significance of similarity scores using a shuffling method that preserves local sequence composition. The LFASTA program can display all the regions of local similarity between two sequences with scores greater than a threshold, using the same scoring parameters and a similar alignment algorithm; these local similarities can be displayed as a "graphic matrix" plot or as individual alignments. In addition, these programs have been generalized to allow comparison of DNA or protein sequences based on a variety of alternative scoring matrices.},
	language = {en},
	number = {8},
	urldate = {2020-02-03},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Pearson, W. R. and Lipman, D. J.},
	month = apr,
	year = {1988},
	pmid = {3162770},
	pages = {2444--2448}
}

@misc{hunt_sanger-pathogensfastaq_2013,
	title = {sanger-pathogens/{Fastaq}},
	copyright = {GPL-3.0},
	url = {https://github.com/sanger-pathogens/Fastaq},
	abstract = {Python3 scripts to manipulate FASTA and FASTQ files},
	urldate = {2020-02-19},
	publisher = {Pathogen Informatics, Wellcome Sanger Institute},
	author = {Hunt, Martin},
	month = sep,
	year = {2013},
	note = {original-date: 2013-09-06T11:54:45Z},
	keywords = {bioinformatics, genomics, global-health, infectious-diseases, next-generation-sequencing, pathogen, research, sequencing}
}


@misc{smith_ericvsmithdataclasses_2020,
	title = {ericvsmith/dataclasses},
	copyright = {Apache-2.0},
	url = {https://github.com/ericvsmith/dataclasses},
	abstract = {Contribute to ericvsmith/dataclasses development by creating an account on GitHub.},
	urldate = {2020-02-27},
	author = {Smith, Eric V.},
	month = may,
	year = {2017},
	note = {original-date: 2017-05-19T20:46:27Z}
}

@conference{Kluyver:2016aa,
	Author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando P{\'e}rez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Dami{\'a}n Avila and Safia Abdalla and Carol Willing},
	Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas},
	Editor = {F. Loizides and B. Schmidt},
	Organization = {IOS Press},
	Pages = {87 - 90},
	Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows},
	Year = {2016}}