去重排序

去重排序

van 知其变,守其恒,为天下式.

字符串去重并排序

给定一个只包含小写字母的字符串,目标是编写一个函数,将字符串中的字符去重并按字典序排序。

初始方法

使用布尔数组作为哈希表来记录字符是否出现过,然后使用qsort函数对结果进行排序。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

void remove_duplicate_and_sort(char* str) {
bool hash[26] = { 0 };
int index = 0;

for (int i = 0; str[i]; i++) {
if (!hash[str[i] - 'a']) {
hash[str[i] - 'a'] = true;
str[index++] = str[i];
}
}

str[index] = '\0';
qsort(str, index, sizeof(char), (int(*)(const void*, const void*)) strcmp);
}

int main(void) {
char str[100];
printf("请输入一个字符串:\n");
scanf("%s", str);
remove_duplicate_and_sort(str);
printf("处理后的字符串是:%s\n", str);
return 0;
}

优化方法:使用位图

通过引入位图(BitMap)的概念,可以进一步提高算法的效率。位图使用一个位来标记某个元素对应的状态,相比布尔数组更节省空间。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#include <stdio.h>
#include "BitMap.h"

void remove_duplicate_and_sort(char* str) {
BitMap* bm = bitmap_create(26);
char* p = str;
while (*p) {
bitmap_set(bm, *p - 'a');
p++;
}
p = str;
for (int i = 0; i < bm->bits; i++) {
if (bitmap_isset(bm, i)) {
*p++ = 'a' + i;
}
}
*p = '\0';
bitmap_destroy(bm);
}

int main(void) {
char str[] = "bcabc";
remove_duplicate_and_sort(str);
puts(str); // 输出处理后的字符串
return 0;
}

这种方法相比使用布尔数组,可以显著减少内存的使用量,尤其是在处理大量数据时更为显著。通过位操作实现的位图,其性能也比使用数组进行标记要高效。


补充

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
//BitMap.h
#include <stdbool.h>
#include <stdint.h>

typedef uint32_t Word; // uint32_t: 1.大小确定 (32bits); 2.无符号整数

typedef struct {
uint32_t* array;
size_t bits;
} BitMap;

BitMap* bitmap_create(size_t bits);
void bitmap_destroy(BitMap* bm);

void bitmap_set(BitMap* bm, size_t n);
void bitmap_unset(BitMap* bm, size_t n);
bool bitmap_isset(BitMap* bm, size_t n);
void bitmap_clear(BitMap* bm);

void grow_capacity(BitMap* bm, size_t bits)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
// BitMap.c
#include "BitMap.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define BITS_PER_WORD 32
#define BITMAP_SHIFT 5 // 2^5 = 32
#define BITMAP_MASK 0x1F
// 存储bits位,需要多少个word
#define BITMAP_SIZE(bits) ((bits + BITS_PER_WORD - 1) >> BITMAP_SHIFT)

BitMap* bitmap_create(size_t bits) {
BitMap* bm = malloc(sizeof(BitMap));
bm->array = calloc(BITMAP_SIZE(bits), BITS_PER_WORD >> 3);
bm->bits = bits;
return bm;
}

void bitmap_set(BitMap* bm, size_t n) {
if (n >= bm->bits) {
if (BITMAP_SIZE(n + 1) > BITMAP_SIZE(bm->bits)) {
// 扩容
grow_capacity(bm, n + 1);
}
bm->bits = n + 1;
}
// 设置第n位
// 如何表示第n位 (word, offset)
size_t word = n >> BITMAP_SHIFT;
size_t offset = n & BITMAP_MASK; // n%32
bm->array[word] |= (0x1 << offset);
}
void bitmap_unset(BitMap* bm, size_t n) {
if (n > bm->bits)
{
return;
}
size_t word = n >> BITMAP_SHIFT;
size_t offset = n & BITMAP_MASK; // n%32
bm->array[word] &= ~(0x1 << offset);
}

bool bitmap_isset(BitMap* bm, size_t n) {
if (n > bm->bits)
{
return;
}
size_t word = n >> BITMAP_SHIFT;
size_t offset = n & BITMAP_MASK; // n%32
bm->array[word] &= (0x1 << offset);
}
void bitmap_clear(BitMap* bm) {
memset(bm->array, 0, BITMAP_SIZE(bm->bits) * sizeof(Word));
}

void bitmap_destroy(BitMap* bm) {
free(bm->array);
free(bm);
}
void grow_capacity(BitMap* bm, size_t bits)
{
Word* new_array = realloc(bm->array, BITMAP_SIZE(bits) * sizeof(Word));//字*字节/字 == 字节数
if (!new_array)
{
printf("error realloc in grow_capacity\n");
exit(1);
}
bm->array = new_array;
int bytes = (BITMAP_SIZE(bits) - BITMAP_SIZE(bm->bits)) * sizeof(Word);
memset(bm->array+BITMAP_SIZE(bm->bits), 0, bytes);
}
  • Title: 去重排序
  • Author: van
  • Created at : 2024-05-06 23:59:54
  • Updated at : 2024-09-02 00:01:49
  • Link: https://xblog.aptzone.cc/2024/05/06/去重排序/
  • License: All Rights Reserved © van
Comments