# 'Speeches' data.frame
structure(list(Name = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("BBB",
"AAA"), class = "factor"), Date = structure(c(12543, 12404, 12404,
12404, 12373, 12362, 12345, 12320, 12207, 15450, 15449, 15449,
15449, 15449, 15449, 15449, 15449, 15448, 15448, 15448), class = "Date")), .Names = c("Name",
"Date"), row.names = c("1", "1.1", "1.2", "1.3", "1.4", "1.5",
"1.6", "1.7", "1.8", "2", "2.1", "2.2", "2.3", "2.4", "2.5",
"2.6", "2.7", "2.8", "2.9", "2.10"), class = "data.frame")
# 'History' data.frame
structure(list(Name = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("BBB", "AAA"), class = "factor"),
Role = structure(c(1L, 2L, 3L, 3L, 3L, 4L, 1L, 2L, 3L, 3L,
3L, 3L, 4L), .Label = c("Political groups", "National parties",
"Member", "Substitute", "Vice-Chair", "Chair", "Vice-President",
"Quaestor", "President", "Co-President"), class = "factor"),
Value = structure(c(10L, 12L, 6L, 3L, 8L, 4L, 9L, 11L, 1L,
7L, 1L, 2L, 5L), .Label = c("a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n", "o"), class = "factor"),
Role.Start = structure(c(12149, 12149, 12150, 12150, 12152,
12150, 14439, 14439, 14441, 14503, 15358, 15411, 14441), class = "Date"),
Role.End = structure(c(12618, 12618, 12618, 12618, 12538,
12618, 15507, 15507, 15357, 15507, 15410, 15507, 15357), class = "Date")), .Names = c("Name",
"Role", "Value", "Role.Start", "Role.End"), row.names = c(NA,
13L), class = "data.frame")
我面临着许多困难。
1)虽然在演讲和历史数据中都有日期信息,但在第一个数据中,我有每个条目的具体日期,在第二个数据中有一个日期范围。理想情况下,我希望能够合并,以便每个演讲条目都与演讲者(姓名)和演讲日期所属的历史条目匹配。
2)期望的输出是有一个data.frame或data.table,其行等于演讲data.frame中的观察值,以及名称、日期和每个角色的列(它们将由值填充)。然而,某些角色在给定的日期为给定的演讲者出现多次,因此我需要能够为这些实例创建多列。
structure(list(Name = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("BBB",
"AAA"), class = "factor"), Date = structure(c(12543, 12404, 12404,
12404, 12373, 12362, 12345, 12320, 12207, 15450, 15449, 15449,
15449, 15449, 15449, 15449, 15449, 15448, 15448, 15448), class = "Date"),
`Political groups` = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("i",
"j"), class = "factor"), `National parties` = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("k", "l"), class = "factor"),
Member.1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("f",
"g"), class = "factor"), Member.2 = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("b", "c"), class = "factor"), Member.3 = structure(c(NA,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), .Label = "h", class = "factor"), Substitute = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), .Label = "d", class = "factor")), .Names = c("Name",
"Date", "Political groups", "National parties", "Member.1", "Member.2",
"Member.3", "Substitute"), row.names = c("1", "1.1", "1.2", "1.3",
"1.4", "1.5", "1.6", "1.7", "1.8", "2", "2.1", "2.2", "2.3",
"2.4", "2.5", "2.6", "2.7", "2.8", "2.9", "2.10"), class = "data.frame")
任何关于如何改进这个问题的帮助和/或评论都是欢迎的!
更新:在V1.9.3+中,现在实现了重叠连接。这是一个特殊的情况,在演讲
中,开始日期
和结束日期是相同的。我们可以使用
foverlaps()
来完成这项工作,如下所示:
require(data.table) ## 1.9.3+
setDT(Speeches)
setDT(History)
Speeches[, `:=`(Date2 = Date, id = .I)]
setkey(History, Name, Role.Start, Role.End)
ans = foverlaps(Speeches, History, by.x=c("Name", "Date", "Date2"))[, Date2 := NULL]
ans = ans[order(id, Value)][, N := 1:.N, by=list(Name, Date, Role, id)]
ans = dcast.data.table(ans, id+Name+Date ~ Role+N, value.var="Value")
这是范围/间隔联接的一种情况。
以下是
data.table
的方式。它使用两个滚动联接。
require(data.table) ## 1.9.2+
dt1 = as.data.table(Speeches)
dt2 = as.data.table(History)
# first rolling join - to get end indices
setkey(dt2, Name, Role.Start)
tmp1 = dt2[dt1, roll=Inf, which=TRUE]
# second rolling join - to get start indices
setkey(dt2, Name, Role.End)
tmp2 = dt2[dt1, roll=-Inf, which=TRUE]
# generate dt1's and dt2's corresponding row indices
idx = tmp1-tmp2+1L
idx1 = rep(seq_len(nrow(dt1)), idx)
idx2 = data.table:::vecseq(tmp2, idx, sum(idx))
dt1[, id := 1:.N] ## needed for casting later
# subset using idx1 and idx2 and bind them colwise
ans = cbind(dt1[idx1], dt2[idx2, -1L, with=FALSE])
# a little reordering to get the output correctly (factors are a pain!)
ans = ans[order(id,Value)][, N := 1:.N, by=list(Name, Date, Role, id)]
# finally cast them.
f_ans = dcast.data.table(ans, id+Name+Date ~ Role+N, value.var="Value")
id Name Date Political groups_1 National parties_1 Member_1 Member_2 Member_3 Substitute_1
1: 1 AAA 2004-05-05 j l c f NA d
2: 2 AAA 2003-12-18 j l c f h d
3: 3 AAA 2003-12-18 j l c f h d
4: 4 AAA 2003-12-18 j l c f h d
5: 5 AAA 2003-11-17 j l c f h d
6: 6 AAA 2003-11-06 j l c f h d
7: 7 AAA 2003-10-20 j l c f h d
8: 8 AAA 2003-09-25 j l c f h d
9: 9 AAA 2003-06-04 j l c f h d
10: 10 BBB 2012-04-20 i k b g NA NA
11: 11 BBB 2012-04-19 i k b g NA NA
12: 12 BBB 2012-04-19 i k b g NA NA
13: 13 BBB 2012-04-19 i k b g NA NA
14: 14 BBB 2012-04-19 i k b g NA NA
15: 15 BBB 2012-04-19 i k b g NA NA
16: 16 BBB 2012-04-19 i k b g NA NA
17: 17 BBB 2012-04-19 i k b g NA NA
18: 18 BBB 2012-04-18 i k b g NA NA
19: 19 BBB 2012-04-18 i k b g NA NA
20: 20 BBB 2012-04-18 i k b g NA NA
require(GenomicRanges)
require(data.table)
dt1 <- as.data.table(Speeches)
dt2 <- as.data.table(History)
gr1 = GRanges(Rle(dt1$Name), IRanges(as.numeric(dt1$Date), as.numeric(dt1$Date)))
gr2 = GRanges(Rle(dt2$Name), IRanges(as.numeric(dt2$Role.Start), as.numeric(dt2$Role.End)))
olaps = findOverlaps(gr1, gr2, type="within")
idx1 = queryHits(olaps)
idx2 = subjectHits(olaps)
# from here, you can do exactly as above
dt1[, id := 1:.N]
...
...
dcast.data.table(ans, id+Name+Date ~ Role+N, value.var="Value")
我有一张表,上面有房间的开始和结束日期。我写了这个SQL: 这些房间在这段时间内是有效的。但是,当我这样过滤的时候, 我没有结果。这些日期就像一个数组: 所以,你可以看到那个房间在2022-12-30是活动的,但我不能得到它。我怎么能那样做? 希望我能很好地表达自己。
问题内容: 我需要填充一个表,该表将存储2个给定日期之间的日期范围:09/01/11-10/10/11 因此,在这种情况下,该表将从2011年9月1日开始存储,直到每天存储到10/10/11,我想知道在SQL Server中是否有一种巧妙的方法-我目前正在使用SQL Server 2008 。 谢谢 问题答案: 在SQL 2005+上很容易;如果您有数字表或理货表,操作会更容易。我在下面伪造了它:
我在一个表(即TAB1)中有多个日期范围,如下所示。 对上面的SQL查询有什么建议吗?
问题内容: 我的数据库中有以下一组匹配日期的日期(dd / MM / yyyy): 事件具有开始和结束日期(时间无关紧要),并且endDate为NULL表示事件仍在进行中。 我想确定的是两个任意日期之间的日期范围,其中a)没有事件,b)事件重叠。 因此,对于输入日期范围01/04/2009-30/06/2009,我希望得到以下结果: 注意,作为结果,两个相邻的重叠范围是可以接受的。 谁能用SQL算
问题内容: 我正在使用MySQL,并且具有下表: 我希望能够生成这样的报告,其中在过去4周内完成了各个周期: 或最近3个月内: 有什么想法可以进行选择查询以生成等效的日期范围和点击次数吗? 问题答案:
本文向大家介绍coldfusion 日期或时间范围,包括了coldfusion 日期或时间范围的使用技巧和注意事项,需要的朋友参考一下 示例 日期或时间范围的示例。