//Jan Holp, DP 2021


//client2 = cassandra
//client1  = elasticsearch 
//-----------------------------------------------------------------

//require for PageRank 
var  Pagerank = require('../lib/pagerank')
var fs = require('fs')
var path = require('path')

//require the Elasticsearch librray
const elasticsearch = require('elasticsearch');
const client1 = new elasticsearch.Client({
   hosts: [ 'localhost:9200']
});
client1.ping({
     requestTimeout: 30000,
 }, function(error) {
 // at this point, eastic search is down, please check your Elasticsearch service
     if (error) {
         console.error('Elasticsearch cluster is down!');
     } else {
         console.log('ELasticSearch is ok');
     }
 });

//create new index - skweb2
client1.indices.create({
    index: 'skweb2'
}, function(error, response, status) {
    if (error) {
        console.log(error);
    } else {
        console.log("created a new index", response);
    }
});

//indexing method
const bulkIndex = function bulkIndex(index, type, data) {
	let bulkBody = [];
	//id = 1;
const errorCount = 0;
	data.forEach(item => {
		bulkBody.push({
			index: {
				_index: index,
				_type:  type,
				_id :   item.target_link,  // documents id is url
			}
		});
		bulkBody.push(item);
	});
        console.log(bulkBody);
        //console.log(object_list.id);

	client1.bulk({body: bulkBody})
		.then(response => {

			response.items.forEach(item => {
				if (item.index && item.index.error) {
					console.log(++errorCount, item.index.error);
				}
			});
			console.log(
				`Successfully indexed ${data.length - errorCount}
				out of ${data.length} items`
			);
		})
		.catch(console.err);
};


const cassandra = require('cassandra-driver');
const client2 = new cassandra.Client({ contactPoints: ['localhost:9042'], localDataCenter: 'datacenter1', keyspace: 'websucker' });
const query1 = 'SELECT domain_name FROM websucker.domain_quality WHERE good_count > 0 ALLOW FILTERING';
//const query2 = 'SELECT * from websucker.content WHERE domain_name = ' + domain_name[i] + 'ALLOW FILTERING'; // body_size > 0

//-------------------------------------------------------------------------

var domain_name = [];         // pole domain name 
var object_list = [];         // pole ktore obsahuje vsetky dokumenty pre jednotilive domain name
const linkProb = 0.85;    // high numbers are more stable (Pagerank)
const tolerance = 0.0001;  // sensitivity for accuracy of convergence


client2.execute(query1)                             // vyselektujeme vsetky domenove mena a ulozime do pola
  .then(result => {
  		let pole = result.rows.map(r => {
  			domain_name.push(r.domain_name)
  		});
  		console.log("Vsetky domenove mena : " , domain_name);
 	domain_name.forEach(name => {										// pre kazde domenove meno spustime select nizsie, kt. vyberie vsetky clanky ktore niesu prazdne
			let query = 'SELECT * from websucker.content WHERE domain_name = ' + "'" + name + "'" + ' and body_size > 0 ALLOW FILTERING';
			client2.execute(query).then( res => {
				object_list = res.rows.map(rr => {
				return {
					 domain_name: rr.domain_name,
					 title: rr.title,
					 body: rr.body,
					 links: rr.links,
					 target_link: rr.target_link,
					// pagerank: Pagerank(rr.links ,linkProb,tolerance, function (err, res) {
					// 	return res;
					// })
				}

				

			});
			//console.log(object_list);
			bulkIndex('skweb2', 'web_page', object_list);
			}).catch(error => console.log(error));
})

}).catch(err => console.log(err));

//volanie funkcie pre vypocet Pageranku a definovane premenne

//Larger numbers (0.85) //var linkProb = 0.85;    
//accuracy at which we terminate 
//--------------Pagerank 
//	const linkProb = 0.85;    // high numbers are more stable (Pagerank)
//	const tolerance = 0.0001;  // sensitivity for accuracy of convergence
   /*
	var nodeMatrix = [
        [object_list.links]
    ];
*/
/*

const PR = function PR(nodeMatrix,linkProb,tolerance){

Pagerank(nodeMatrix, linkProb, tolerance, function (err, res) {


    return res;
    //console.log(res);
           
});
}
*/