POJ 1743 Musical Theme(后缀数组)

孙佑运

2023-12-01

A musical melody is represented as a sequence of N (1<=N<=20000)notes that are integers in the range 1..88, each representing a key on the piano. It is unfortunate but true that this representation of melodies ignores the notion of musical timing; but, this programming task is about notes and not timings.
Many composers structure their music around a repeating &qout;theme&qout;, which, being a subsequence of an entire melody, is a sequence of integers in our representation. A subsequence of a melody is a theme if it:

is at least five notes long
appears (potentially transposed -- see below) again somewhere else in the piece of music
is disjoint from (i.e., non-overlapping with) at least one of its other appearance(s)

Transposed means that a constant positive or negative value is added to every note value in the theme subsequence.
Given a melody, compute the length (number of notes) of the longest theme.
One second time limit for this problem's solutions!

Input

The input contains several test cases. The first line of each test case contains the integer N. The following n integers represent the sequence of notes.
The last test case is followed by one zero.

Output

For each test case, the output file should contain a single line with a single integer that represents the length of the longest theme. If there are no themes, output 0.

Sample Input

30
25 27 30 34 39 45 52 60 69 79 69 60 52 45 39 34 30 26 22 18
82 78 74 70 66 67 64 60 65 80
0

Sample Output

Hint

Use scanf instead of cin to reduce the read time.

题意:

现在有一个序列, 求最长的重复转调序列, 重复的转调可以理解为两个序列的元素前后差值相等

若序列长度大于5则输出长度，否则输出0。

思路以及做法:

1.考虑先对原串T处理，即做差处理得到S

2.对新得到的串S做后缀排序求出height, 二分长度在一段height内判断，是否出现重复即可。

PS:段即是这段相同的前缀，字符是相同的，因为他们的排名相邻,。

例如：

aaabcddx

其后缀排名

①aaabcddx

②aabcddx

③abcddx

仅仅对于a字符, 在这段中因为他们的排名相邻，即意味着前缀也有相同的部分

假设: 我们二分的答案为k = 1

①与②的LCP ≥ 1

sa[2] - sa[1] >= 1

即是出现了重复

新段？:

到了下一个后缀

heigh[4] < 1

这时新的一段开始。

具体看代码吧

#include <iostream>
//#include <unordered_map>
#include <time.h>
#include <algorithm>
#include <stdio.h>
#include <string.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;

#ifdef LOCAL
#define debug(x) cout << "[" __FUNCTION__ ": " #x " = " << (x) << "]\n"
#define TIME cout << "RuningTime: " << clock() << "ms\n", 0
#else
#define TIME 0
#endif
#define hash_ 1000000009
#define Continue(x) { x; continue; }
#define Break(x) { x; break; }
const int mod = 1e9 + 7;
const int N = 2e5 + 10;
const int INF = 0x3f3f3f3f;
const ll LINF = 0x3f3f3f3f3f3f3f3f;
#define gc p1 == p2 && (p2 = (p1 = buf) + fread(buf, 1, 1000000, stdin), p1 == p2) ? EOF : *p1++;
inline int read(){ static char buf[1000000], *p1 = buf, *p2 = buf; register int x = false; register char ch = gc; register bool sgn = false; while (ch != '-' && (ch < '0' || ch > '9')) ch = gc; if (ch == '-') sgn = true, ch = gc; while (ch >= '0'&& ch <= '9') x = (x << 1) + (x << 3) + (ch ^ 48), ch = gc; return sgn ? -x : x; }
ll fpow(ll a, int b, int mod) { ll res = 1; for (; b > 0; b >>= 1) { if (b & 1) res = res * a % mod; a = a * a % mod; } return res; }
int MX;
int str[N];
struct SA{
	int n, r; //n字符串长度 r基数
	int sa[N]; //排名为i的后缀位置+1 i取值1~n
	int cnt[N]; //基数排序辅助数组
	int rak[N]; //第i个后缀的排名
	int tmp[N]; //rak的辅助数组
	int heig[N]; //后缀排序相邻LCP
	void radix_sort(int *rk, int *tp)
	{
		memset(cnt, 0, sizeof cnt);   // tp[i]用于记录下标, 即是第二排序依据
		for (int i = 1; i <= n; i++)
			cnt[rk[tp[i]]]++;
		for (int i = 1; i <= r; i++)  // 与基数排序一样
			cnt[i] += cnt[i - 1];
		for (int i = n; i >= 1; i--) // 赋值给sa
			sa[cnt[rk[tp[i]]]--] = tp[i];
	}
	void suffix()
	{
		int *rk = rak, *tp = tmp;
		for (int i = 1; i <= n; i++)
			rk[i] = str[i], tp[i] = i; // 最开始, 排名不清楚， 默认字符大小
		r = 200; // 0 ~ 127
		radix_sort(rk, tp);
		for (int l = 1, p = 1, i; p < n; l <<= 1, r = p) // l 是上一次排序的长度, r为最高的排名数
		{
			for (p = 0, i = n - l + 1; i <= n; i++) //长度不满l的后缀, 后面为空, 显然优先级最高
				tp[++p] = i;
			for (i = 1; i <= n; i++) // sa[i] 上一次的排名情况, sa[i] - l为当前这个串的前一半的位置, 直接赋值给tp
				if (sa[i] > l)    // 因为是从1 ~ n, 排名按照从小到大
					p++, tp[p] = sa[i] - l;
			radix_sort(rk, tp);
			swap(rk, tp); // rk, tp交换接下来更新rk, tp存的是未更新的rk的信息
			rk[sa[1]] = p = 1;
			for (i = 2; i <= n; i++)// 若一个不相等即排名++, p也即是不同排名的个数
			{
				if (tp[sa[i]] != tp[sa[i - 1]] || tp[sa[i] + l] != tp[sa[i - 1] + l])
					p++;
				rk[sa[i]] = p;
			}
		}
	}
	void get_height()//heig[i]为str[sa[i-1]]与str[sa[i]]的最长公共前缀
	{
		for (int i = 1; i <= n; i++)
			rak[sa[i]] = i;
		int k = 0;
		for (int i = 1; i <= n; i++)
		{
			if (k)
				k--;      // k是比较的第几个字符
			int j = sa[rak[i] - 1]; // 前一个排名的位置
			while (str[i + k] == str[j + k])
				k++;
			heig[rak[i]] = k;
		}
	}
}sa;
bool check(int mid)
{
	int MX = sa.sa[1], MI = sa.sa[i], cnt = 0;
	for (int i = 2; i <= sa.n; i++)
	{
		if (sa.heig[i] < mid)  // 这里意味着新段的开始
			MX = MI = sa.sa[i];
		else   // 在这一段height内, 其相同前缀均大于mid, 意味着我们任意取这段内的两个后缀
		{     // 其长度均大于mid， 我们取最大的差值判断即可, 差值大于了mid, 意味着中间
			if (MX < sa.sa[i])   // 出现一段长度大于等于k的字符串, 与当前的这段即重复
				MX = sa.sa[i];
			if (MI > sa.sa[i])
				MI = sa.sa[i];
			if (MX - MI >= mid)
				return true;
		}
	}
	return false;
}
void solve()
{
	int L = 1, R = sa.n, ans = 0;
	while (L <= R)
	{
		int mid = L + R >> 1;
		if (check(mid))
			L = mid + 1, ans = mid;
		else
			R = mid - 1;
	}
	printf("%d\n", ans + 1 >= 5 ? ans + 1 : 0);
}
int main()
{
#ifdef LOCAL
	freopen("D:/input.txt", "r", stdin);
#endif
	int n;
	while (cin >> n, n)
	{
		for (int i = 1; i <= n; i++)
			scanf("%d", &str[i]);
		for (int i = 1; i <= n - 1; i++)
		{
			str[i] = str[i + 1] - str[i];
			str[i] += 100;
		}
		sa.n = --n;
		sa.suffix();
		sa.get_height();
		solve();
	}
	return TIME;
}

POJ 1743 Musical Theme(后缀数组)

相关阅读

相关文章

相关问答

相关文档