forked from KEMT/zpwiki
		
	Upload files to 'pages/students/2016/jan_holp/dp2021/dp2021 zdrojove subory'
upravena funkcia map
This commit is contained in:
		
							parent
							
								
									7dd72f0760
								
							
						
					
					
						commit
						4989384544
					
				| @ -0,0 +1,143 @@ | ||||
| //Jan Holp, DP 2021
 | ||||
| 
 | ||||
| 
 | ||||
| //client2 = cassandra
 | ||||
| //client1  = elasticsearch 
 | ||||
| //-----------------------------------------------------------------
 | ||||
| 
 | ||||
| //require for PageRank 
 | ||||
| var  Pagerank = require('../lib/pagerank') | ||||
| var fs = require('fs') | ||||
| var path = require('path') | ||||
| 
 | ||||
| //require the Elasticsearch librray
 | ||||
| const elasticsearch = require('elasticsearch'); | ||||
| const client1 = new elasticsearch.Client({ | ||||
|    hosts: [ 'localhost:9200'] | ||||
| }); | ||||
| client1.ping({ | ||||
|      requestTimeout: 30000, | ||||
|  }, function(error) { | ||||
|  // at this point, eastic search is down, please check your Elasticsearch service
 | ||||
|      if (error) { | ||||
|          console.error('Elasticsearch cluster is down!'); | ||||
|      } else { | ||||
|          console.log('ELasticSearch is ok'); | ||||
|      } | ||||
|  }); | ||||
| 
 | ||||
| //create new index - skweb2
 | ||||
| client1.indices.create({ | ||||
|     index: 'skweb2' | ||||
| }, function(error, response, status) { | ||||
|     if (error) { | ||||
|         console.log(error); | ||||
|     } else { | ||||
|         console.log("created a new index", response); | ||||
|     } | ||||
| }); | ||||
| 
 | ||||
| //indexing method
 | ||||
| const bulkIndex = function bulkIndex(index, type, data) { | ||||
| 	let bulkBody = []; | ||||
| 	//id = 1;
 | ||||
| const errorCount = 0; | ||||
| 	data.forEach(item => { | ||||
| 		bulkBody.push({ | ||||
| 			index: { | ||||
| 				_index: index, | ||||
| 				_type:  type, | ||||
| 				_id :   item.target_link,  // documents id is url
 | ||||
| 			} | ||||
| 		}); | ||||
| 		bulkBody.push(item); | ||||
| 	}); | ||||
|         console.log(bulkBody); | ||||
|         //console.log(object_list.id);
 | ||||
| 
 | ||||
| 	client1.bulk({body: bulkBody}) | ||||
| 		.then(response => { | ||||
| 
 | ||||
| 			response.items.forEach(item => { | ||||
| 				if (item.index && item.index.error) { | ||||
| 					console.log(++errorCount, item.index.error); | ||||
| 				} | ||||
| 			}); | ||||
| 			console.log( | ||||
| 				`Successfully indexed ${data.length - errorCount} | ||||
| 				out of ${data.length} items` | ||||
| 			); | ||||
| 		}) | ||||
| 		.catch(console.err); | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
| const cassandra = require('cassandra-driver'); | ||||
| const client2 = new cassandra.Client({ contactPoints: ['localhost:9042'], localDataCenter: 'datacenter1', keyspace: 'websucker' }); | ||||
| const query1 = 'SELECT domain_name FROM websucker.domain_quality WHERE good_count > 0 ALLOW FILTERING'; | ||||
| //const query2 = 'SELECT * from websucker.content WHERE domain_name = ' + domain_name[i] + 'ALLOW FILTERING'; // body_size > 0
 | ||||
| 
 | ||||
| //-------------------------------------------------------------------------
 | ||||
| 
 | ||||
| var domain_name = [];         // pole domain name 
 | ||||
| var object_list = [];         // pole ktore obsahuje vsetky dokumenty pre jednotilive domain name
 | ||||
| const linkProb = 0.85;    // high numbers are more stable (Pagerank)
 | ||||
| const tolerance = 0.0001;  // sensitivity for accuracy of convergence
 | ||||
| 
 | ||||
| 
 | ||||
| client2.execute(query1)                             // vyselektujeme vsetky domenove mena a ulozime do pola
 | ||||
|   .then(result => { | ||||
|   		let pole = result.rows.map(r => { | ||||
|   			domain_name.push(r.domain_name) | ||||
|   		}); | ||||
|   		console.log("Vsetky domenove mena : " , domain_name); | ||||
|  	domain_name.forEach(name => {										// pre kazde domenove meno spustime select nizsie, kt. vyberie vsetky clanky ktore niesu prazdne
 | ||||
| 			let query = 'SELECT * from websucker.content WHERE domain_name = ' + "'" + name + "'" + ' and body_size > 0 ALLOW FILTERING'; | ||||
| 			client2.execute(query).then( res => { | ||||
| 				object_list = res.rows.map(rr => { | ||||
| 				return { | ||||
| 					 domain_name: rr.domain_name, | ||||
| 					 title: rr.title, | ||||
| 					 body: rr.body, | ||||
| 					 links: rr.links, | ||||
| 					 target_link: rr.target_link, | ||||
| 					// pagerank: Pagerank(rr.links ,linkProb,tolerance, function (err, res) {
 | ||||
| 					// 	return res;
 | ||||
| 					// })
 | ||||
| 				} | ||||
| 
 | ||||
| 				 | ||||
| 
 | ||||
| 			}); | ||||
| 			//console.log(object_list);
 | ||||
| 			bulkIndex('skweb2', 'web_page', object_list); | ||||
| 			}).catch(error => console.log(error)); | ||||
| }) | ||||
| 
 | ||||
| }).catch(err => console.log(err)); | ||||
| 
 | ||||
| //volanie funkcie pre vypocet Pageranku a definovane premenne
 | ||||
| 
 | ||||
| //Larger numbers (0.85) //var linkProb = 0.85;    
 | ||||
| //accuracy at which we terminate 
 | ||||
| //--------------Pagerank 
 | ||||
| //	const linkProb = 0.85;    // high numbers are more stable (Pagerank)
 | ||||
| //	const tolerance = 0.0001;  // sensitivity for accuracy of convergence
 | ||||
|    /* | ||||
| 	var nodeMatrix = [ | ||||
|         [object_list.links] | ||||
|     ]; | ||||
| */ | ||||
| /* | ||||
| 
 | ||||
| const PR = function PR(nodeMatrix,linkProb,tolerance){ | ||||
| 
 | ||||
| Pagerank(nodeMatrix, linkProb, tolerance, function (err, res) { | ||||
| 
 | ||||
| 
 | ||||
|     return res; | ||||
|     //console.log(res);
 | ||||
|             | ||||
| }); | ||||
| } | ||||
| */ | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user