题目:
从N个数中选取最大的前10个, 有序输出.
N最大可能达到1000亿
每个数范围是0 – 2147483647
堆排序版测试结果:
总计[1000000]个输入
总计比较[4232804]次
总计写内存[3849024]次
总计耗时[0.046478s]
来源:http://blog.csdn.net/lgg201/article/details/8449490
/* * author: goosman.lei * mail: lgg860911@yahoo.com.cn * blog: http://blog.csdn.net/lgg201 */ #include <stdio.h> #include <time.h> #include <stdlib.h> #include <unistd.h> #include <strings.h> #define BUFF_LEN (4096) #define PARENT(i) ((i) / 2 - 1) #define LEFT(i) ((i + 1) * 2 - 1) #define RIGHT(i) ((i + 1) * 2) /* #define DEBUG */ #define INFO #ifdef INFO int s_0, s_1, s_2; struct timeval begin, end; #endif typedef struct queue_s queue_t; struct queue_s { int data; queue_t *next; }; static void generate_test_data(long n) { long i; int r; int l; l = sizeof(int); srand(time(NULL)); for ( i = 0; i < n; i ++ ) { r = rand(); fprintf(stdout, "%d\n", r); write(STDERR_FILENO, &r, l); } } static int read_input(int fd, void *buff, int buff_len) { int i, ret; for ( i = 0; i < buff_len; ) { ret = read(fd, buff, BUFF_LEN); if ( -1 == ret ) { perror("read error\n"); exit(0); } else if ( 0 == ret ) { break; } else { buff += ret; i += ret; } } return i; } static void dump_link(queue_t *q, int n) { for ( ; q != NULL; q = q->next ) fprintf(n ? stderr : stdout, "%d\n", q->data); if ( n ) printf("\n"); } void max_heapify(int *sbuff, int j) { int i; #ifdef INFO s_0 += 3; s_1 ++; #endif if ( sbuff[j] < sbuff[LEFT(j)] ) i = LEFT(j); else i = j; if ( sbuff[i] < sbuff[RIGHT(j)] ) { i = RIGHT(j); #ifdef INFO s_1 ++; #endif } if ( i != j ) { sbuff[i] ^= sbuff[j]; sbuff[j] ^= sbuff[i]; sbuff[i] ^= sbuff[j]; max_heapify(sbuff, i); #ifdef INFO s_1 += 3; #endif } } int main(int argc, char *argv[]) { int *sbuff, *rbuff, *rbuff_t; int i, j, n, rbuff_n; if ( argc < 2 ) { printf("usage: \n\t1. 生成测试数据: %s <number> /* 标准错误以二进制方式输出测试数据, 标准输出以文本方式输出测试数据用于脚本校验 */\n\t2. 执行Top 10查找: %s <exec> /* 标准输出输出前10个最大数据(倒序), 开启INFO时在标准错误输出统计信息, 开启DEBUG时在标准错误输出调试信息\n", argv[0], argv[0]); return (0); } if ( strcmp(argv[1], "exec") != 0 ) { /* 不考虑数字输入的容错了 */ generate_test_data(atoi(argv[1])); return 0; } sbuff = malloc(1024 * 1024 * 4 - 4); rbuff = malloc(256 * 1024 * 10 * 4); /* 足够10000亿数据 */ rbuff_t = rbuff; rbuff_n = 0; #ifdef INFO s_0 = 0; s_1 = 0; s_2 = 0; gettimeofday(&begin, NULL); #endif while ( 0 != (n = read_input(STDIN_FILENO, sbuff, 1024 * 1024 * 4 - 4)) ) { #ifdef INFO s_2 += n / 4; #endif for ( j = (n / 4) / 2; j >= 0; j -- ) { #ifdef INFO s_0 ++; #endif max_heapify(sbuff, j); } for ( i = 0; i < 10; i ++ ) { #ifdef INFO s_0 ++; s_1 += 4; #endif rbuff[rbuff_n] = sbuff[0]; sbuff[0] = sbuff[(n / 4) - 1 - i]; sbuff[(n / 4) - 1 - i] = -1; max_heapify(sbuff, 0); rbuff_n ++; } } for ( j = rbuff_n / 2; j >= 0; j -- ) { #ifdef INFO s_0 ++; #endif max_heapify(rbuff, j); } for ( i = 0; i < 10; i ++ ) { #ifdef INFO s_0 ++; s_1 += 4; #endif printf("%d\n", rbuff[0]); rbuff[0] = rbuff[rbuff_n - i]; rbuff[rbuff_n - i] = -1; max_heapify(rbuff, 0); } #ifdef INFO gettimeofday(&end, NULL); #endif #ifdef INFO fprintf(stderr, "总计[%d]个输入\n总计比较[%d]次\n总计写内存[%d]次\n总计耗时[%0.6fs]\n", s_2, s_0, s_1, (end.tv_sec * 1000000 + end.tv_usec - begin.tv_sec * 1000000 - begin.tv_usec) / 1000000.0); #endif return 0; }