文件多路递归C语言实现代码
#include <assert.h>
#include <time.h>
#include <stdio.h>
#include <memory.h>
#include <stdlib.h>
void swap_int(int* a,int* b)
{
int c;
c = *a;
*a = *b;
*b = c;
}
//插入排序
void InsertionSort(int A[],int N)
{
int j,p;
int tmp;
for(p = 1; p < N; p++)
{
tmp = A[p];
for(j = p;j > 0 && A[j - 1] >tmp;j--)
{
A[j] = A[j - 1];
}
A[j] = tmp;
}
}
//三数取中分割法
int Median3(int A[],int Left,int Right)
{
int Center = (Left + Right) / 2;
if (A[Left] > A[Center])
swap_int(&A[Left],&A[Center]);
if (A[Left] > A[Right])
swap_int(&A[Left],&A[Right]);
if (A[Center] > A[Right])
swap_int(&A[Center],&A[Right]);
swap_int(&A[Center],&A[Right - 1]);
return A[Right - 1];
}
//快速排序
void QuickSort(int A[],int Left,int Right)
{
int i,j;
int Pivot;
const int Cutoff = 3;
if (Left + Cutoff <= Right)
{
Pivot = Median3(A,Left,Right);
i = Left;
j = Right - 1;
while (1)
{
while(A[++i] < Pivot){;}
while(A[--j] > Pivot){;}
if (i < j)
swap_int(&A[i],&A[j]);
else
break;
}
swap_int(&A[i],&A[Right - 1]);
QuickSort(A,Left,i - 1);
QuickSort(A,i + 1,Right);
}
else
{
InsertionSort(A+Left,Right - Left + 1);
}
}
//const int KNUM = 40;
//分块数
const int NUMBER = 10000000;
//输入文件最大读取的整数的个数
//为了便于测试,我决定改成小文件小数据量进行测试。
const int KNUM = 4;
//分块数const int NUMBER = 100;
//输入文件最大读取的整数的个数
const char *in_file = "infile.txt";
const char *out_file = "outfile.txt";
//#define OUTPUT_OUT_FILE_DATA
//数据量大的时候,没必要把所有的数全部打印出来,所以可以把上面这句注释掉。
void gen_infile(int n)
{
int i;
FILE *f = fopen(in_file, "wt");
for(i = 0;i < n; i++)
fprintf(f,"%d ",rand());
fclose(f);
}
int read_data(FILE *f,int a[],int n)
{
int i = 0;
while ((i < n) && (fscanf(f,"%d",&a[i]) != EOF))
i++;
printf("read: %d integer/n",i);
return i;
}
void write_data(FILE *f,int a[],int n)
{
int i;for(i = 0; i< n;i++)
fprintf(f,"%d ",a[i]);
}
char* temp_filename(int index)
{
char *tempfile = (char*) malloc(64*sizeof(char));
assert(tempfile);
sprintf(tempfile, "temp%d.txt", index);
return tempfile;
}
//K路串行读取
void k_num_read(void)
{
char* filename;
int i,cnt,*array;
FILE* fin;
FILE* tmpfile;
//计算knum,每路应读取的整数个数
int n = NUMBER/KNUM;
if (n * KNUM < NUMBER)n++;
//建立存储分块读取的数据的数组
array = (int*)malloc(n * sizeof(int));assert(array);
//打开输入文件
fin = fopen(in_file,"rt");
i = 0;
//分块循环读取数据,并写入硬盘上的临时文件
while ( (cnt = read_data(fin,array,n))>0)
{
//对每次读取的数据,先进行快速排序,然后写入硬盘上的临时文件
QuickSort(array,0,cnt - 1);
filename = temp_filename(i++);
tmpfile = fopen(filename,"w");
free(filename);
write_data(tmpfile,array,cnt);
fclose(tmpfile);
}
assert(i == KNUM);
//没有生成K路文件时进行诊断
//关闭输入文件句柄和临时存储数组
fclose(fin);
free(array);
}
//k路合并(败者树)
void k_num_merge(void)
{
FILE *fout;
FILE **farray;
char *filename;
int *data;
char *hasNext;
int i,j,m,min;
#ifdef OUTPUT_OUT_FILE_DATAint id;
#endif
//打开输出文件
fout = fopen(out_file,"wt");
//打开各路临时分块文件
farray = (FILE**)malloc(KNUM*sizeof(FILE*));
assert(farray);
for(i = 0; i< KNUM;i++)
{
filename = temp_filename(i);
farray[i] = fopen(filename,"rt");
free(filename);
}
//建立KNUM个元素的data,hasNext数组,存储K路文件的临时数组和读取结束状态
data = (int*)malloc(KNUM*sizeof(int));
assert(data);
hasNext = (char*)malloc(sizeof(char)*KNUM);
assert(hasNext);
memset(data, 0, sizeof(int) * KNUM);
memset(hasNext, 1, sizeof(char) * KNUM);
//读K路文件先读取第一组数据,并对读取结束的各路文件设置不可再读状态
for(i = 0; i < KNUM; i++)
{
if(fscanf(farray[i], "%d", &data[i]) == EOF)
{
hasNext[i] = 0;
}
}
//读取各路文件,利用败者树从小到大输出到输出文件
#ifdef OUTPUT_OUT_FILE_DATAid = 0;
#endif
j = 0;F_LOOP:
if (j < KNUM)
//以下这段代码嵌套过深,日后应尽量避免此类问题。
{
while(1==1)
{
min = data[j];
m = j;
for(i = j+1; i < KNUM; i++)
{
if(hasNext[i] == 1 && min > data[i])
{
min = data[i];m = i;
}
}
if(fscanf(farray[m], "%d", &data[m]) == EOF)
{
hasNext[m] = 0;
}
fprintf(fout, "%d ", min);
#ifdef OUTPUT_OUT_FILE_DATAprintf("fout :%d %d/n",++id,min);
#endif
if (m == j && hasNext[m] == 0)
{
for (i = j+1; i < KNUM; i++)
{
if (hasNext[m] != hasNext[i])
{
m = i;
//第i个文件未读完,从第i个继续往下读
break;
}
}
if (m != j)
{
j = m;
goto F_LOOP;
}
break;
}
}
}
//关闭分配的数据和数组
free(hasNext);
free(data);
for(i = 0; i < KNUM; ++i)
{
fclose(farray[i]);
}
free(farray);
fclose(fout);
}
int main()
{
time_t start = time(NULL),end,start_read,end_read,start_merge,end_merge;
gen_infile(NUMBER);
end = time(NULL);
printf("gen_infile data time:%f/n", (end - start) * 1000.0/ CLOCKS_PER_SEC);
start_read = time(NULL);k_num_read();
end_read = time(NULL);
printf("k_num_read time:%f/n", (end_read - start_read) * 1000.0/ CLOCKS_PER_SEC);
start_merge = time(NULL);
k_num_merge();
end_merge = time(NULL);
printf("k_num_merge time:%f/n", (end_merge - start_merge) * 1000.0/ CLOCKS_PER_SEC);
end = time(NULL);
printf("total time:%f/n", (end - start) * 1000.0/ CLOCKS_PER_SEC);
return 0;
}
