字符串去重并排序
给定一个只包含小写字母的字符串,目标是编写一个函数,将字符串中的字符去重并按字典序排序。
初始方法
使用布尔数组作为哈希表来记录字符是否出现过,然后使用qsort
函数对结果进行排序。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdbool.h>
void remove_duplicate_and_sort(char* str) { bool hash[26] = { 0 }; int index = 0;
for (int i = 0; str[i]; i++) { if (!hash[str[i] - 'a']) { hash[str[i] - 'a'] = true; str[index++] = str[i]; } }
str[index] = '\0'; qsort(str, index, sizeof(char), (int(*)(const void*, const void*)) strcmp); }
int main(void) { char str[100]; printf("请输入一个字符串:\n"); scanf("%s", str); remove_duplicate_and_sort(str); printf("处理后的字符串是:%s\n", str); return 0; }
|
优化方法:使用位图
通过引入位图(BitMap)的概念,可以进一步提高算法的效率。位图使用一个位来标记某个元素对应的状态,相比布尔数组更节省空间。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| #include <stdio.h> #include "BitMap.h"
void remove_duplicate_and_sort(char* str) { BitMap* bm = bitmap_create(26); char* p = str; while (*p) { bitmap_set(bm, *p - 'a'); p++; } p = str; for (int i = 0; i < bm->bits; i++) { if (bitmap_isset(bm, i)) { *p++ = 'a' + i; } } *p = '\0'; bitmap_destroy(bm); }
int main(void) { char str[] = "bcabc"; remove_duplicate_and_sort(str); puts(str); return 0; }
|
这种方法相比使用布尔数组,可以显著减少内存的使用量,尤其是在处理大量数据时更为显著。通过位操作实现的位图,其性能也比使用数组进行标记要高效。
补充
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| #include <stdbool.h> #include <stdint.h>
typedef uint32_t Word;
typedef struct { uint32_t* array; size_t bits; } BitMap;
BitMap* bitmap_create(size_t bits); void bitmap_destroy(BitMap* bm);
void bitmap_set(BitMap* bm, size_t n); void bitmap_unset(BitMap* bm, size_t n); bool bitmap_isset(BitMap* bm, size_t n); void bitmap_clear(BitMap* bm);
void grow_capacity(BitMap* bm, size_t bits)
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
| #include "BitMap.h" #include <stdlib.h> #include <stdio.h> #include <string.h>
#define BITS_PER_WORD 32 #define BITMAP_SHIFT 5 #define BITMAP_MASK 0x1F
#define BITMAP_SIZE(bits) ((bits + BITS_PER_WORD - 1) >> BITMAP_SHIFT)
BitMap* bitmap_create(size_t bits) { BitMap* bm = malloc(sizeof(BitMap)); bm->array = calloc(BITMAP_SIZE(bits), BITS_PER_WORD >> 3); bm->bits = bits; return bm; }
void bitmap_set(BitMap* bm, size_t n) { if (n >= bm->bits) { if (BITMAP_SIZE(n + 1) > BITMAP_SIZE(bm->bits)) { grow_capacity(bm, n + 1); } bm->bits = n + 1; } size_t word = n >> BITMAP_SHIFT; size_t offset = n & BITMAP_MASK; bm->array[word] |= (0x1 << offset); } void bitmap_unset(BitMap* bm, size_t n) { if (n > bm->bits) { return; } size_t word = n >> BITMAP_SHIFT; size_t offset = n & BITMAP_MASK; bm->array[word] &= ~(0x1 << offset); }
bool bitmap_isset(BitMap* bm, size_t n) { if (n > bm->bits) { return; } size_t word = n >> BITMAP_SHIFT; size_t offset = n & BITMAP_MASK; bm->array[word] &= (0x1 << offset); } void bitmap_clear(BitMap* bm) { memset(bm->array, 0, BITMAP_SIZE(bm->bits) * sizeof(Word)); }
void bitmap_destroy(BitMap* bm) { free(bm->array); free(bm); } void grow_capacity(BitMap* bm, size_t bits) { Word* new_array = realloc(bm->array, BITMAP_SIZE(bits) * sizeof(Word)); if (!new_array) { printf("error realloc in grow_capacity\n"); exit(1); } bm->array = new_array; int bytes = (BITMAP_SIZE(bits) - BITMAP_SIZE(bm->bits)) * sizeof(Word); memset(bm->array+BITMAP_SIZE(bm->bits), 0, bytes); }
|