Thank you to anyone who has already donated - your generous donations helped make three months of treatment possible.
My brother Nate continues to fight stage IV Hodgkin's lymphoma. He's just 31, with a wife and baby girl. They have no active income (since he's been unable to return to work), no insurance, and cannot afford the treatment he needs. Nate and his family need your help. Please consider a donation, every dollar helps. Thanks.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
// Web Crawler in AjSharp // http://ajlopez.wordpress.com/category/ajsharp/ // Build and launch agents object WebCrawler { { uri = new System.Uri(url); downloader = new Downloader(); harvester = new Harvester(); resolver = new Resolver(uri,5); processor = new Processor(fn); downloader.Harvester = harvester; downloader.Processor = processor; harvester.Resolver = resolver; resolver.Downloader = downloader; downloader.Process(uri, 0); } } // Downloads a page agent Downloader { { client = new System.Net.WebClient(); content = client.DownloadString(uri); PrintLine("Downloaded: " + uri); this.Harvester.Process(uri,depth,content); this.Processor.Process(uri, content); } } // Process the content retrieved agent Processor { { this.fn = fn; // function to invoke } { // Add your logic this.fn(uri, content); } } // Get links from page agent Harvester { { matches = System.Text.RegularExpressions.Regex.Matches(content, "href=\\s*\"([^&\"]*)\""); results = new List(); foreach (match in matches) { value = match.Groups[1].Value; if (!results.Contains(value)) results.Add(value); } foreach (result in results) if (result.StartsWith("http")) this.Resolver.Process(new System.Uri(result), depth+1); } } // Filter invalid or already processed links agent Resolver { var processed = new List(); { this.host = uri.Host; this.maxdepth = maxdepth; } { if (depth > this.maxdepth) return; if (uri.Host != this.host) return; if (uri.Scheme != System.Uri.UriSchemeHttp && uri.Scheme != System.Uri.UriSchemeHttps) return; if (processed.Contains(uri)) return; processed.Add(uri); PrintLine("New Link: " + uri); this.Downloader.Process(uri,depth); } } // Example WebCrawler.Process("http://ajlopez.wordpress.com", function(uri,content) { PrintLine("From ajlopez.wordpress "+uri);}); WebCrawler.Process("http://ajlopez.zoomblog.com", function(uri,content) { PrintLine("From ajlopez.zoomblog "+uri);}); |