C#编写的一个反向代理工具,可以缓存网页到本地
转自:http://www.oschina.net/code/snippet_172400_17195
proxy.ashx 主文件
<%@ WebHandler Language="C#" Class="proxy" %> using System; using System.Web; using System.Net; using System.Text; using System.IO; using System.Collections.Generic; using System.Configuration; /// <summary> /// 把http headers 和 http 响应的内容 分别存储在 /proxy/header/ 和 /proxy/body/ 中 /// 分层次创建目录 /// </summary> public class proxy : IHttpHandler { HttpResponse Response; HttpRequest Request; HttpApplicationState Application; HttpServerUtility Server; static string proxyCacheFolder = ConfigurationManager.AppSettings["proxyCacheFolder"]; static string proxyDomain = ConfigurationManager.AppSettings["proxyDomain"]; static string proxyReferer = ConfigurationManager.AppSettings["proxyReferer"]; bool proxyCacheDirectAccess = ConfigurationManager.AppSettings["proxyCacheDirectAccess"] == "true"; int proxyCacheSeconds = int.Parse(ConfigurationManager.AppSettings["proxyCacheSeconds"]); public void ProcessRequest(HttpContext context) { Response = context.Response; Request = context.Request; Application = context.Application; Server = context.Server; string path = context.Request.RawUrl; bool delCache = path.IndexOf("?del") > 0; if (delCache) { path = path.Replace("?del", string.Empty); DeleteCacheFile(path); return; } bool allowCache = Request.QueryString["cache"] == "true"; string seconds = Request.QueryString["seconds"] ?? string.Empty; if (!int.TryParse(seconds, out proxyCacheSeconds)) { proxyCacheSeconds = 3600; } if (allowCache) { EchoData(path); } else { WebClient wc = new WebClient(); wc.Headers.Set("Referer", proxyReferer); byte[] buffer = wc.DownloadData(proxyDomain + path); Response.ContentType = wc.ResponseHeaders["Content-Type"]; foreach (string key in wc.ResponseHeaders.AllKeys) { Response.Headers.Set(key, wc.ResponseHeaders[key]); } wc.Dispose(); Response.OutputStream.Write(buffer, 0, buffer.Length); } } /// <summary> /// 清理失效的缓存 /// </summary> /// <param name="d"></param> void ClearTimeoutCache(DirectoryInfo d) { if (d.Exists) { FileInfo[] files = d.GetFiles(); foreach (FileInfo file in files) { TimeSpan timeSpan = DateTime.Now - file.LastAccessTime; if (timeSpan.TotalSeconds > proxyCacheSeconds) { file.Delete(); } } } } string GetCacheFolderPath(string hash) { string s = string.Empty; for (int i = 0; i <= 2; i++) { s += hash[i] + "/"; } return s; } /// <summary> /// 读取缓存的header 并输出 /// </summary> /// <param name="cacheHeaderPath"></param> void EchoCacheHeader(string cacheHeaderPath) { string[] headers = File.ReadAllLines(cacheHeaderPath); for (int i = 0; i < headers.Length; i++) { string[] headerKeyValue = headers[i].Split(':'); if (headerKeyValue.Length == 2) { if (headerKeyValue[0] == "Content-Type") { Response.ContentType = headerKeyValue[1]; } Response.Headers.Set(headerKeyValue[0], headerKeyValue[1]); } } } void DeleteCacheFile(string path) { string absFolder = Server.MapPath(proxyCacheFolder); string hash = GetHashString(path); string folder = GetCacheFolderPath(hash); string cacheBodyPath = absFolder + "/body/" + folder + hash; string cacheHeaderPath = absFolder + "/header/" + folder + hash; FileInfo cacheBody = new FileInfo(cacheBodyPath); FileInfo cacheHeader = new FileInfo(cacheHeaderPath); if (cacheBody.Exists) { cacheBody.Delete(); } if (cacheHeader.Exists) { cacheHeader.Delete(); } Response.Write("delete cache file Success!\r\n" + path); } /// <summary> /// 输出缓存 /// </summary> /// <param name="cacheHeaderPath">缓存header 的文件路径</param> /// <param name="cacheBodyPath">缓存 body 的文件路径</param> /// <param name="ifTimeout">是否进行判断文件过期</param> /// <returns>是否输出成功</returns> bool EchoCacheFile(string cacheHeaderPath, string cacheBodyPath, bool ifTimeout) { FileInfo cacheBody = new FileInfo(cacheBodyPath); FileInfo cacheHeader = new FileInfo(cacheHeaderPath); ClearTimeoutCache(cacheBody.Directory); ClearTimeoutCache(cacheHeader.Directory); if (cacheBody.Exists && cacheHeader.Exists) { if (ifTimeout) { TimeSpan timeSpan = DateTime.Now - cacheBody.LastWriteTime; if (timeSpan.TotalSeconds < proxyCacheSeconds) { EchoCacheHeader(cacheHeaderPath); Response.TransmitFile(cacheBodyPath); return true; } } else { EchoCacheHeader(cacheHeaderPath); Response.TransmitFile(cacheBodyPath); return true; } } return false; } void EchoData(string path) { string absFolder = Server.MapPath(proxyCacheFolder); string hash = GetHashString(path); string folder = GetCacheFolderPath(hash); string cacheBodyPath = absFolder + "/body/" + folder + hash; string cacheHeaderPath = absFolder + "/header/" + folder + hash; bool success; if (proxyCacheDirectAccess) { success = EchoCacheFile(cacheHeaderPath, cacheBodyPath, false); if (!success) { Response.Write("直接从缓存读取失败!"); } return; } success = EchoCacheFile(cacheHeaderPath, cacheBodyPath, true); if (success) { return; } //更新Cache File string ApplicationKey = "CacheList"; List<string> List = null; if (Application[ApplicationKey] == null) { Application.Lock(); Application[ApplicationKey] = List = new List<string>(1000); Application.UnLock(); } else { List = (List<string>)Application[ApplicationKey]; } //判断是否已有另一个进程正在更新Cache File if (List.Contains(hash)) { success = EchoCacheFile(cacheHeaderPath, cacheBodyPath, false); if (success) { return; } else { WebClient wc = new WebClient(); wc.Headers.Set("Referer", proxyReferer); //主体内容 byte[] data = wc.DownloadData(proxyDomain + path); //处理header Response.ContentType = wc.ResponseHeaders["Content-Type"]; foreach (string key in wc.ResponseHeaders.AllKeys) { Response.Headers.Set(key, wc.ResponseHeaders[key]); } wc.Dispose(); Response.BinaryWrite(data); } } else { WebClient wc = new WebClient(); wc.Headers.Set("Referer", proxyReferer); StringBuilder headersb = new StringBuilder(); List.Add(hash); //主体内容 byte[] data = wc.DownloadData(proxyDomain + path); //处理header Response.ContentType = wc.ResponseHeaders["Content-Type"]; foreach (string key in wc.ResponseHeaders.AllKeys) { headersb.Append(key); headersb.Append(":"); headersb.Append(wc.ResponseHeaders[key]); headersb.Append("\r\n"); Response.Headers.Set(key, wc.ResponseHeaders[key]); } wc.Dispose(); string headers = headersb.ToString().Trim(); if (!Directory.Exists(absFolder + "/header/" + folder)) { Directory.CreateDirectory(absFolder + "/header/" + folder); } StreamWriter sw = File.CreateText(absFolder + "/header/" + folder + hash); sw.Write(headers); sw.Close(); sw.Dispose(); //处理缓存内容 if (!Directory.Exists(absFolder + "/body/" + folder)) { Directory.CreateDirectory(absFolder + "/body/" + folder); } FileStream fs = File.Create(absFolder + "/body/" + folder + hash); fs.Write(data, 0, data.Length); fs.Close(); fs.Dispose(); List.Remove(hash); Response.BinaryWrite(data); } } string GetHashString(string path) { string md5 = GetMd5Str(path); return md5; } static string GetMd5Str(string ConvertString) { System.Security.Cryptography.MD5CryptoServiceProvider md5 = new System.Security.Cryptography.MD5CryptoServiceProvider(); string t2 = BitConverter.ToString(md5.ComputeHash(UTF8Encoding.Default.GetBytes(ConvertString)), 4, 8); t2 = t2.Replace("-", ""); return t2; } public bool IsReusable { get { return false; } } }
web.config
<?xml version="1.0"?> <configuration> <configSections> <section name="RewriterConfig" type="URLRewriter.Config.RewriterConfigSerializerSectionHandler, URLRewriter"/> </configSections> <RewriterConfig> <Rules> <RewriterRule> <LookFor>~/.*$</LookFor> <SendTo> <!--cache=true 设置此路径进行缓存--> <![CDATA[~/proxy.ashx?cache=true&seconds=30]]> </SendTo> </RewriterRule> <RewriterRule> <LookFor>~/ajax/.*$</LookFor> <SendTo> <!--cache=false 设置此路径不允许缓存--> <![CDATA[~/proxy.ashx?cache=false]]> </SendTo> </RewriterRule> </Rules> </RewriterConfig> <appSettings> <!--#反向代理设置 start--> <!--设置站点--> <add key="proxyDomain" value="http://127.0.0.1:12123/"/> <!--缓存文件夹--> <add key="proxyCacheFolder" value="/proxyCache/"/> <!--缓存时长--> <add key="proxyCacheSeconds" value="3600"/> <!--设置不再判断缓存文件是否超时,直接从缓存读取--> <add key="proxyCacheDirectAccess" value="false"/> <!--设置反向代理Referer--> <add key="proxyReferer" value="http://www.www.com/"/> <!--#反向代理设置 end--> </appSettings> <system.webServer> <modules runAllManagedModulesForAllRequests="true"> <add type="URLRewriter.ModuleRewriter, URLRewriter" name="ModuleRewriter"/> </modules> </system.webServer> <system.web> <compilation debug="true"/> </system.web> </configuration>