拼接字符串，得到字典序最小的结果

1、题目

给定一个由字符串组成的数组 strs，必须把所有的字符串拼接起来，返回所有可能的拼接结果中，字典序最小的结果。

2、思路

贪心策略：将数组中的字符串按字典序进行排序，然后按序拼接起来。

这种策略不对！！，因为存在反例：["b", "ba"]，字典序 b < ba，按照贪心策略得到的拼接字符串应该就是 bba，但是实际上 bab 这个拼接的结果是比 bba 小的。

//失败的贪心策略
if (str1 < str2) //str1的字典序 < str2的字典序
    str1在前;
else 
    str2在前;
1
2
3
4
5

真正有效的贪心策略：使用比较器判断两个字符串拼接后的字典序

//如果 str1 拼接上 str2 之后的字典序 < str2 拼接上 str1 之后的字典序
if (str1.str2 < str2.str1) 
    str1在前;
else
    str2在前;
1
2
3
4
5

为什么这种贪心策略是有效的？ 这就需要证明。

首先需要证明该排序策略具有传递性，目的是为了证明在该种排序策略下得到的字符串是唯一的。

传递性也就是要证明：根据（1）a.b <= b.a；（2）b.c <= c.b，能否推导出 a.c <= c.a？

例如字符串 “123” 和 “456”，拼接后为 “123456”，如果是 K 进制，那么这个拼接过程用数学符号代替就是 “123” $\times$ K³ + “456”。

所有的字符串都可以看做数字，则一定是非负整数。

进一步抽象化，假设所有字符串都是 K 进制的，那么 a.b = a $\times$ K^b长度 + b，再进一步替代，m(x) = K^x长度。

那么第1）2)条件就可以进行改写：

a.b <= b.a 可以改写为： a * m(b) + b <= b * m(a) + a ①

b.c <= c.b 可以改写为：b * m(c) + c <= c * m(b) + b ②

①式左右两边同时减b，再同时乘c：a * m(b) *c <= c * b * m(a) + a * c - b * c

②式左右两边同时减b，再同时乘a：a * b * m(c) + a * c - b * a <= c * m(b) * a

可得a * b * m(c) + a * c - b * a <= c * b * m(a) + a * c - b * c

同时减(a * c )，得到 a * b * m(c) - b*a <= c * b * m(a) - b * c

同时含因子 b，两边同时除以b：a * m(c) - a <= c * m(a) - c

挪动a和c：a * m(c) +c <= c * m(a) + a 变成了a.c <= c.a，证明结束。
证明得到的字符串一定是字典序最小的。

先证明任意2个字符串如果交换顺序，字典序只会变大。然后任意3个，任意4个，任意5个，…

现在需要证明 [… a $m_1$ $m_2$ b …] <= [… b $m_1$ $m_2$ a …]

首先，如果仅仅是交换 a 和 $m_1$ ，那么根据传递性，a. $m_1$ <= $m 1$ .a，所以[… a $m_1$ $m_2$ b …] <= [… $m_1$ a $m_2$ b …]

同理，又因为 a. $m_2$ <= $m_2$ .a，所以 [… $m_1$ a $m_2$ b …] <= [… $m_1$ $m_2$ a b …]

同理，交换a和b，[… $m_1$ $m_2$ a b …] <= [… $m_1$ $m_2$ b a …]

再将b 和 $m_2$ 交换，[… $m_1$ $m_2$ b a …] <= [… $m_1$ b $m_2$ a …]

最后，交换b 和 $m_1$ ，所以得到 [… $m_1$ b $m_2$ a …] <= […b $m_1$ $m_2$ a …]

总结来说就是： [… a $m_1$ $m_2$ b …] <= [… b $m_1$ $m_2$ a …]。

所以交换a和b的顺序，字典序只会增大，不会变小。再去证明交换任意3个、4个、…等等，用数学归纳法推导出来。

但是，实际上贪心策略不需要证明也能知道其正确性。方法论：用暴力解+ 对数器 的方法验证贪心策略的正确性。

3、代码实现

C++ 版

/*************************************************************************
	> File Name: 034.拼接字符串字典序最小.cpp
	> Author: Maureen 
	> Mail: Maureen@qq.com 
	> Created Time: 四  6/30 21:19:41 2022
 ************************************************************************/

#include <iostream>
#include <vector>
#include <set>
#include <cstring>
#include <ctime>
using namespace std;

vector<string> removeIndexString(vector<string> &strs, int index) {
    int n = strs.size();
    vector<string> ans(n - 1);

    int ansIndex = 0;
    for (int i = 0; i < n; i++) {
        if (i != index) {
            ans[ansIndex++] = strs[i];
        }
    }
    return ans;
}

set<string> process(vector<string> &strs) {
    set<string> ans;
    if (strs.size() == 0) {
        ans.insert("");
        return ans;
    }

    for (int i = 0; i < strs.size(); i++) {
        string first = strs[i];
        vector<string> nexts = removeIndexString(strs, i);
        set<string> next = process(nexts);
        for (string cur : next) {
            ans.insert(first + cur);
        }
    }
    return ans;
}

string lowestString1(vector<string> &strs) {
    if (strs.size() == 0) return "";
    set<string> ans = process(strs);
    auto it = ans.begin();
    return ans.size() == 0 ? "" : *it;
}



//方法二：
bool comp(const string &a, const string &b) {
    return a + b < b + a;
}

string lowestString2(vector<string> &strs) {
    if (strs.size() == 0) return "";
    sort(strs.begin(), strs.end(), comp);
    string res = "";
    for (int i = 0; i < strs.size(); i++) {
        res += strs[i];
    }

    return res;
}


//for test 
string generateRandomString(int len) {
    char *ans = new char[rand() % len + 1];
    for (int i = 0; i < strlen(ans); i++) {
        int value = rand() % 5;
        ans[i] = (rand() % 100 / (double)101) <= 0.5 ? (char)(65 + value) : (char)(97 + value);
    }
    return ans;
}


vector<string> generateRandomStringArray(int alen, int slen) {
    vector<string> ans(rand() % alen + 1);
    for (int i = 0; i < ans.size(); i++) {
        ans[i] = generateRandomString(slen);
    }
    return ans;
}


vector<string> copyStringArray(vector<string> &arr) {
    vector<string> ans(arr.size());
    for (int i = 0; i < ans.size(); i++) {
        ans[i] = arr[i];
    }

    return ans;
}


int main() {
    srand(time(0));
        
    int arrLen = 6;
    int strLen = 5;

    int testTime = 10001;

    cout << "test begin" << endl;

    for (int i = 0; i < testTime; i++) {
        vector<string> arr1 = generateRandomStringArray(arrLen, strLen);
        vector<string> arr2 = copyStringArray(arr1);

        if (lowestString1(arr1) != lowestString2(arr2)) {
            for (string str : arr1) {
                cout << str << " , ";
            }
            cout << endl;
            cout << "Oops!" << endl;
            break;
        }
        if (i && i % 100 == 0) cout << i << " cases passed!" << endl;
    }

    cout << "finish!" << endl;

    return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

Java 版

package class13;

import java.util.Arrays;
import java.util.Comparator;
import java.util.TreeSet;

public class Code05_LowestLexicography {
	
	//方法一：暴力解
	public static String lowestString1(String[] strs) {
		if (strs == null || strs.length == 0) {
			return "";
		}
		TreeSet<String> ans = process(strs);
		return ans.size() == 0 ? "" : ans.first();
	}

	// strs中所有字符串全排列，返回所有可能的结果
	public static TreeSet<String> process(String[] strs) {
		TreeSet<String> ans = new TreeSet<>();
		if (strs.length == 0) {
			ans.add("");
			return ans;
		}
		for (int i = 0; i < strs.length; i++) {
			String first = strs[i];
			String[] nexts = removeIndexString(strs, i);
			TreeSet<String> next = process(nexts);
			for (String cur : next) {
				ans.add(first + cur);
			}
		}
		return ans;
	}

	// {"abc", "cks", "bct"}
	// 0 1 2
	// removeIndexString(arr , 1) -> {"abc", "bct"}
	public static String[] removeIndexString(String[] arr, int index) {
		int N = arr.length;
		String[] ans = new String[N - 1];
		int ansIndex = 0;
		for (int i = 0; i < N; i++) {
			if (i != index) {
				ans[ansIndex++] = arr[i];
			}
		}
		return ans;
	}

	//方法二：使用对数器
	public static class MyComparator implements Comparator<String> {
		@Override
		public int compare(String a, String b) {
			return (a + b).compareTo(b + a);
		}
	}

	public static String lowestString2(String[] strs) {
		if (strs == null || strs.length == 0) {
			return "";
		}
		Arrays.sort(strs, new MyComparator());
		String res = "";
		for (int i = 0; i < strs.length; i++) {
			res += strs[i];
		}
		return res;
	}

	// for test
	public static String generateRandomString(int strLen) {
		char[] ans = new char[(int) (Math.random() * strLen) + 1];
		for (int i = 0; i < ans.length; i++) {
			int value = (int) (Math.random() * 5);
			ans[i] = (Math.random() <= 0.5) ? (char) (65 + value) : (char) (97 + value);
		}
		return String.valueOf(ans);
	}

	// for test
	public static String[] generateRandomStringArray(int arrLen, int strLen) {
		String[] ans = new String[(int) (Math.random() * arrLen) + 1];
		for (int i = 0; i < ans.length; i++) {
			ans[i] = generateRandomString(strLen);
		}
		return ans;
	}

	// for test
	public static String[] copyStringArray(String[] arr) {
		String[] ans = new String[arr.length];
		for (int i = 0; i < ans.length; i++) {
			ans[i] = String.valueOf(arr[i]);
		}
		return ans;
	}

	public static void main(String[] args) {
		int arrLen = 6;
		int strLen = 5;
		int testTimes = 10000;
		System.out.println("test begin");
		for (int i = 0; i < testTimes; i++) {
			String[] arr1 = generateRandomStringArray(arrLen, strLen);
			String[] arr2 = copyStringArray(arr1);
			if (!lowestString1(arr1).equals(lowestString2(arr2))) {
				for (String str : arr1) {
					System.out.print(str + ",");
				}
				System.out.println();
				System.out.println("Oops!");
			}
		}
		System.out.println("finish!");
	}
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

相关阅读:
【rk3568-linux】 rk3568x_linux-- 编译说明
Git:起步 - 关于版本控制
从零开始学习软件测试-第40天笔记
【算法|前缀和系列No.5】leetcode1314. 矩阵区域和
R语言ggplot2可视化：使用ggpubr包的ggline函数可视化折线图（点线图、line plot）
【5分钟背八股】03：说说你对aop的理解？
服务器硬件基础知识
【AcWing】3449. 数字根 (数论思维、模拟)
JS加密/解密之你是否真的明白xss
可托拉拽的WPF选项卡控件，强大好用！

原文地址：https://blog.csdn.net/u011386173/article/details/125548237