求最长不重叠子串

2018/05/16 23:21
阅读数 6
A musical melody is represented as a sequence of N (1<=N<=20000)notes that are integers in the range 1..88, each representing a key on the piano. It is unfortunate but true that this representation of melodies ignores the notion of musical timing; but, this programming task is about notes and not timings.
Many composers structure their music around a repeating &qout;theme&qout;, which, being a subsequence of an entire melody, is a sequence of integers in our representation. A subsequence of a melody is a theme if it:
  • is at least five notes long
  • appears (potentially transposed -- see below) again somewhere else in the piece of music
  • is disjoint from (i.e., non-overlapping with) at least one of its other appearance(s)

Transposed means that a constant positive or negative value is added to every note value in the theme subsequence.
Given a melody, compute the length (number of notes) of the longest theme.
One second time limit for this problem's solutions!
Input
The input contains several test cases. The first line of each test case contains the integer N. The following n integers represent the sequence of notes.
The last test case is followed by one zero.
Output
For each test case, the output file should contain a single line with a single integer that represents the length of the longest theme. If there are no themes, output 0.
Sample Input
30
25 27 30 34 39 45 52 60 69 79 69 60 52 45 39 34 30 26 22 18
82 78 74 70 66 67 64 60 65 80
0
Sample Output
5
Hint
Use scanf instead of cin to reduce the read time.
 

题意:给定一个钢琴的音普序列[值的范围是(1~88)],现在要求找到一个子序列满足

1,长度至少为5

2,序列可以转调,即存在两个子序列,满足一个子序列加/减一个数后可以得到另一个序列

3,两个序列不能有相交的部分。

题意简单来说就是找最长不重叠的重复子串

思路分析:

参考 09年的国家集训队论文,二分答案,把题目变成判定性问题:判断是否存在两个长度为k 的子串是相同的,且不重叠。解决这个问题的关键还是利用height 数组。把排序后的后缀分成若干组,其中每组的后缀之间的height 值都不小于k。容易得出,有希望成为最长公共前缀不小于k 的两个后缀一定在同一组。然后对于每组后缀,只须判断每个后缀的sa 值的最大值和最小值之差是否不小于k。如果有一组满足,则说明存在,否则不存在。整个做法的时间复杂度为O(nlogn)。

对于第二点要求的转换,即转调条件,一个子序列加/减一个数后可以得到另一个序列实际是两个序列的变化程度是一样的,比如序列1 2 3 4 5 6 7 8 9 10 那么对于这个序列的可以得到长度为5的两个不相交的"转调后的重复子序列", 序列一:1 2 3 4 5(变化程度 1 1 1 1) 序列二:6 7 8 9 10(变化程度1 1 1 1) ,变化程度为相邻两个数字的差值,得到变化序列后我们就可以用上面的思路来求解了。 因为是通过变化序列来求解,所以答案序列是变化序列+1。

代码示例:

/*
给定一个钢琴的音普序列[值的范围是(1~88)],现在要求找到一个子序列满足
1,长度至少为5
2,序列可以转调,即存在两个子序列,满足一个子序列加/减一个数后可以得到另一个序列
3,两个序列不能有相交的部分。
题意简单来说就是找最长不重叠的重复子串 
*/

#define ll long long
const int maxn = 2e4+5;

/*
-待排序数组长度为 n,放在 0~n-1,在最后加一个'$',让其比所有的字符串中的字符都小,
这样既可以满足字典序的要求,也不越界
*build_sa( ,n+1, );//注意是n+1;
*getheight(,n);
*例如:
*n   = 8;
*num[]   = { 1, 1, 2, 1, 1, 1, 1, 2, $ };注意num最后一位为0,其他大于0
*rank[]  = { 4, 6, 8, 1, 2, 3, 5, 7, 0 };rank[0~n-1]为有效值,rank[n]必定为0无效值
*sa[]    = { 8, 3, 4, 5, 0, 6, 1, 7, 2 };sa[1~n]为有效值,sa[0]必定为n是无效值
*height[]= { 0, 0, 3, 2, 3, 1, 2, 0, 1 };height[2~n]为有效值
*/

int n; 
int a[maxn], s[maxn];
int sa[maxn], rank[maxn], height[maxn];
int t1[maxn], t2[maxn], c[maxn];

bool cmp(int *r, int a, int b, int l){
    return r[a] == r[b] && r[a+l] == r[b+l]; // !!!
}

void build_sa(int n, int m){ //m代表估计数字,是ASCll的最大值
    int *x = t1, *y = t2;
    
    for(int i = 0; i < m; i++) c[i] = 0;
    for(int i = 0; i < n; i++) c[x[i]=s[i]]++;
    for(int i = 1; i < m; i++) c[i] += c[i-1];
    for(int i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
    
    for(int j = 1; j <= n; j <<= 1){  // !!!
        int p = 0;
        for(int i = n-j; i < n; i++) y[p++] = i;
        for(int i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i]-j;
        
        for(int i = 0; i < m; i++) c[i] = 0;
        for(int i = 0; i < n; i++) c[x[y[i]]]++;
        for(int i = 1; i < m; i++) c[i] += c[i-1];
        for(int i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
        
        swap(x, y);
        p = 1; x[sa[0]] = 0;  // !!!!
        
        for(int i = 1; i < n; i++) 
            x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
       
        if (p >= n) break;
        m = p; // 下次基数排序的最大值  !!!
    }
}

void getheight(){
    int k = 0;
    for(int i = 0; i <= n; i++) rank[sa[i]] = i;
    for(int i = 0; i < n; i++){
        if (k) k--;
        int j = sa[rank[i]-1];
        while(s[i+k] == s[j+k]) k++;
        height[rank[i]] = k;
    }
}


bool check(int k){
    int Max = sa[1], Min = sa[1];
    
    for(int i = 2; i <= n; i++){
        if (height[i] < k) Max = Min = sa[i];
        else {
            if (sa[i] < Min) Min = sa[i];
            if (sa[i] > Max) Max = sa[i];
            if (Max - Min >= k) return  true;
        }
    }
    return false;
}
    
int main() {
    //freopen("in.txt", "r", stdin);
    //freopen("out.txt", "w", stdout);
    
    while(~scanf("%d", &n) && n){
        for(int i = 0; i < n; i++){
            scanf("%d", &a[i]); 
        }
        for(int i = 1; i < n; i++){
            s[i-1] = a[i]-a[i-1]+90;
        }
        n--; // 减少一个单位长度
        s[n] = 0;
        build_sa(n+1, 200); //此时的n是要算上最末尾的特殊字符
        getheight(); //此时的n是不算特殊字符的长度
        
        int l = 1, r = n;
        int ans = -1;
        while(l <= r){
            int mid = (l+r)>>1;
            if (check(mid)){
                ans = mid;
                l = mid+1;
            }
            else r = mid-1;
        }
        if (ans < 4) printf("0\n");
        else printf("%d\n", ans+1); // 这里再加 1 是因为,此串是转换成相邻两差的形式
                                    // 因此要还原最原始的串,则需要长度 +1
    }    
    return 0;
}

 

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部