C 语言词频统计258

词频统计是指统计一个文本中每个单词出现的次数。在 C 语言中，我们可以使用哈希表来实现词频统计。哈希表是一个数据结构，它将键值对存储在数组中，并使用哈希函数将键映射到数组索引。哈希函数将键转换成一个数字，该数字用于确定键值对在数组中的位置。

要使用哈希表进行词频统计，我们可以将单词作为键，将单词出现的次数作为值。当我们从文本中读取单词时，我们首先检查哈希表中是否已经存在该单词。如果单词不存在，我们就创建一个新的键值对，并将单词的出现次数初始化为 1。如果单词已经存在，我们就将单词的出现次数增加 1。

以下是 C 语言中使用哈希表进行词频统计的一个示例代码：```c
#include
#include
#include
// 哈希表大小
#define HASHTABLE_SIZE 1000
// 哈希表节点
typedef struct node {
char *key;
int value;
struct node *next;
} node_t;
// 哈希表
typedef struct hashtable {
node_t table;
int size;
} hashtable_t;
// 创建哈希表
hashtable_t *create_hashtable(int size) {
hashtable_t *hashtable = malloc(sizeof(hashtable_t));
if (hashtable == NULL) {
return NULL;
}
hashtable->table = malloc(sizeof(node_t *) * size);
if (hashtable->table == NULL) {
free(hashtable);
return NULL;
}
for (int i = 0; i < size; i++) {
hashtable->table[i] = NULL;
}
hashtable->size = size;
return hashtable;
}
// 销毁哈希表
void destroy_hashtable(hashtable_t *hashtable) {
for (int i = 0; i < hashtable->size; i++) {
node_t *node = hashtable->table[i];
while (node != NULL) {
node_t *next = node->next;
free(node->key);
free(node);
node = next;
}
}
free(hashtable->table);
free(hashtable);
}
// 哈希函数
unsigned int hash_function(char *key) {
unsigned int hash = 0;
for (int i = 0; key[i] != '\0'; i++) {
hash = hash * 31 + key[i];
}
return hash;
}
// 插入键值对
void insert(hashtable_t *hashtable, char *key, int value) {
unsigned int hash = hash_function(key) % hashtable->size;
node_t *node = malloc(sizeof(node_t));
if (node == NULL) {
return;
}
node->key = strdup(key);
node->value = value;
node->next = hashtable->table[hash];
hashtable->table[hash] = node;
}
// 查找键
node_t *find(hashtable_t *hashtable, char *key) {
unsigned int hash = hash_function(key) % hashtable->size;
node_t *node = hashtable->table[hash];
while (node != NULL) {
if (strcmp(node->key, key) == 0) {
return node;
}
node = node->next;
}
return NULL;
}
int main() {
// 创建哈希表
hashtable_t *hashtable = create_hashtable(HASHTABLE_SIZE);
// 从文本中读取单词
FILE *fp = fopen("", "r");
if (fp == NULL) {
perror("fopen");
return EXIT_FAILURE;
}
char word[100];
while (fscanf(fp, "%s", word) != EOF) {
// 将单词插入哈希表
node_t *node = find(hashtable, word);
if (node == NULL) {
insert(hashtable, word, 1);
} else {
node->value++;
}
}
fclose(fp);
// 打印词频
for (int i = 0; i < hashtable->size; i++) {
node_t *node = hashtable->table[i];
while (node != NULL) {
printf("%s: %d", node->key, node->value);
node = node->next;
}
}
// 销毁哈希表
destroy_hashtable(hashtable);
return EXIT_SUCCESS;
}
```