replaceAt {Biostrings} | R Documentation |
Extract/replace arbitrary substrings from/in a string or set of strings.
Description
extractAt
extracts multiple subsequences from XString
object x
, or from the individual sequences of XStringSet
object x
, at the ranges of positions specified thru at
.
replaceAt
performs multiple subsequence replacements (a.k.a.
substitutions) in XString object x
, or in the individual
sequences of XStringSet object x
, at the ranges of positions
specified thru at
.
Usage
extractAt(x, at)
replaceAt(x, at, value="")
Arguments
x |
An XString or XStringSet object. |
at |
Typically a IntegerRanges object if Alternatively, the ranges can be specified with only 1 number
per range (its start position), in which case they are considered
to be empty ranges (a.k.a. zero-width ranges). So if The following applies only if
As a special case, |
value |
The replacement sequences. If If As a special case, |
Value
For extractAt
: An XStringSet object of the same length as
at
if x
is an XString object.
An XStringSetList object of the same length as x
(and same effective shape as at
) if x
is an
XStringSet object.
For replaceAt
: An object of the same class as x
.
If x
is an XStringSet object, its length and names and
metadata columns are preserved.
Note
Like subseq
(defined and documented in the
XVector package), extractAt
does not copy the sequence data!
extractAt
is equivalent to extractList
(defined and documented in the IRanges package) when x
is an
XString object and at
a IntegerRanges object.
Author(s)
H. Pagès
See Also
The
subseq
andsubseq<-
functions in the XVector package for simpler forms of subsequence extractions and replacements.The
extractList
andunstrsplit
functions defined and documented in the IRanges package.The
replaceLetterAt
function for a DNA-specific single-letter replacement functions useful for SNP injections.The
padAndClip
function for padding and clipping strings.The XString, XStringSet, and XStringSetList classes.
The IntegerRanges, IntegerRangesList, IntegerList, and CharacterList classes defined and documented in the IRanges package.
Examples
## ---------------------------------------------------------------------
## (A) ON AN XString OBJECT
## ---------------------------------------------------------------------
x <- BString("abcdefghijklm")
at1 <- IRanges(5:1, width=3)
extractAt(x, at1)
names(at1) <- LETTERS[22:26]
extractAt(x, at1)
at2 <- IRanges(c(1, 5, 12), c(3, 4, 12), names=c("X", "Y", "Z"))
extractAt(x, at2)
extractAt(x, rev(at2))
value <- c("+", "-", "*")
replaceAt(x, at2, value=value)
replaceAt(x, rev(at2), value=rev(value))
at3 <- IRanges(c(14, 1, 1, 1, 1, 11), c(13, 0, 10, 0, 0, 10))
value <- 1:6
replaceAt(x, at3, value=value) # "24536klm1"
replaceAt(x, rev(at3), value=rev(value)) # "54236klm1"
## Deletions:
stopifnot(replaceAt(x, at2) == "defghijkm")
stopifnot(replaceAt(x, rev(at2)) == "defghijkm")
stopifnot(replaceAt(x, at3) == "klm")
stopifnot(replaceAt(x, rev(at3)) == "klm")
## Insertions:
at4 <- IRanges(c(6, 10, 2, 5), width=0)
stopifnot(replaceAt(x, at4, value="-") == "a-bcd-e-fghi-jklm")
stopifnot(replaceAt(x, start(at4), value="-") == "a-bcd-e-fghi-jklm")
at5 <- c(5, 1, 6, 5) # 2 insertions before position 5
replaceAt(x, at5, value=c("+", "-", "*", "/"))
## No-ops:
stopifnot(replaceAt(x, NULL, value=NULL) == x)
stopifnot(replaceAt(x, at2, value=extractAt(x, at2)) == x)
stopifnot(replaceAt(x, at3, value=extractAt(x, at3)) == x)
stopifnot(replaceAt(x, at4, value=extractAt(x, at4)) == x)
stopifnot(replaceAt(x, at5, value=extractAt(x, at5)) == x)
## The order of successive transformations matters:
## T1: insert "+" before position 1 and 4
## T2: insert "-" before position 3
## T1 followed by T2
x2a <- replaceAt(x, c(1, 4), value="+")
x3a <- replaceAt(x2a, 3, value="-")
## T2 followed by T1
x2b <- replaceAt(x, 3, value="-")
x3b <- replaceAt(x2b, c(1, 4), value="+")
## T1 and T2 simultaneously:
x3c <- replaceAt(x, c(1, 3, 4), value=c("+", "-", "+"))
## ==> 'x3a', 'x3b', and 'x3c' are all different!
## Append "**" to 'x3c':
replaceAt(x3c, length(x3c) + 1L, value="**")
## ---------------------------------------------------------------------
## (B) ON AN XStringSet OBJECT
## ---------------------------------------------------------------------
x <- BStringSet(c(seq1="ABCD", seq2="abcdefghijk", seq3="XYZ"))
at6 <- IRanges(c(1, 3), width=1)
extractAt(x, at=at6)
unstrsplit(extractAt(x, at=at6))
at7 <- IRangesList(IRanges(c(2, 1), c(3, 0)),
IRanges(c(7, 2, 12, 7), c(6, 5, 11, 8)),
IRanges(2, 2))
## Set inner names on 'at7'.
unlisted_at7 <- unlist(at7)
names(unlisted_at7) <-
paste0("rg", sprintf("%02d", seq_along(unlisted_at7)))
at7 <- relist(unlisted_at7, at7)
extractAt(x, at7) # same as 'as(mapply(extractAt, x, at7), "List")'
extractAt(x, at7[3]) # same as 'as(mapply(extractAt, x, at7[3]), "List")'
replaceAt(x, at7, value=extractAt(x, at7)) # no-op
replaceAt(x, at7) # deletions
at8 <- IRangesList(IRanges(1:5, width=0),
IRanges(c(6, 8, 10, 7, 2, 5),
width=c(0, 2, 0, 0, 0, 0)),
IRanges(c(1, 2, 1), width=c(0, 1, 0)))
replaceAt(x, at8, value="-")
value8 <- relist(paste0("[", seq_along(unlist(at8)), "]"), at8)
replaceAt(x, at8, value=value8)
replaceAt(x, at8, value=as(c("+", "-", "*"), "List"))
## Append "**" to all sequences:
replaceAt(x, as(width(x) + 1L, "List"), value="**")
## ---------------------------------------------------------------------
## (C) ADVANCED EXAMPLES
## ---------------------------------------------------------------------
library(hgu95av2probe)
probes <- DNAStringSet(hgu95av2probe)
## Split the probes in 5-mer chunks:
at <- successiveIRanges(rep(5, 5))
extractAt(probes, at)
## Replace base 13 by its complement:
at <- IRanges(13, width=1)
base13 <- extractAt(probes, at)
base13comp <- relist(complement(unlist(base13)), base13)
replaceAt(probes, at, value=base13comp)
## See ?xscat for a more efficient way to do this.
## Replace all the occurences of a given pattern with another pattern:
midx <- vmatchPattern("VCGTT", probes, fixed=FALSE)
matches <- extractAt(probes, midx)
unlist(matches)
unique(unlist(matches))
probes2 <- replaceAt(probes, midx, value="-++-")
## See strings with 2 or more susbtitutions:
probes2[elementNROWS(midx) >= 2]
## 2 sanity checks:
stopifnot(all(replaceAt(probes, midx, value=matches) == probes))
probes2b <- gsub("[ACG]CGTT", "-++-", as.character(probes))
stopifnot(identical(as.character(probes2), probes2b))