//Jan Holp, DP 2021 //client2 = cassandra //client1 = elasticsearch //----------------------------------------------------------------- //require the Elasticsearch librray const elasticsearch = require('elasticsearch'); const client1 = new elasticsearch.Client({ hosts: [ 'localhost:9200'] }); client1.ping({ requestTimeout: 30000, }, function(error) { // at this point, eastic search is down, please check your Elasticsearch service if (error) { console.error('Elasticsearch cluster is down!'); } else { console.log('ELasticSearch is ok'); } }); //create new index skweb2 client1.indices.create({ index: 'skweb2' }, function(error, response, status) { if (error) { console.log(error); } else { console.log("created a new index", response); } }); //indexing method const bulkIndex = function bulkIndex(index, type, data) { let bulkBody = []; id = 1; const errorCount = 0; /// dsdgfdhdhgdjd data.forEach(item => { bulkBody.push({ index: { _index: index, _type: type, _id : id++, // id zmenit na url } }); bulkBody.push(item); }); // console.log(bulkBody); client1.bulk({body: bulkBody}) .then(response => { response.items.forEach(item => { if (item.index && item.index.error) { console.log(++errorCount, item.index.error); } }); console.log( `Successfully indexed ${data.length - errorCount} out of ${data.length} items` ); }) .catch(console.err); }; const cassandra = require('cassandra-driver'); const client2 = new cassandra.Client({ contactPoints: ['localhost:9042'], localDataCenter: 'datacenter1', keyspace: 'websucker' }); const query1 = 'SELECT domain_name FROM websucker.domain_quality WHERE good_count > 0 ALLOW FILTERING'; //const query2 = 'SELECT * from websucker.content WHERE domain_name = ' + domain_name[i] + 'ALLOW FILTERING'; // body_size > 0 //------------------------------------------------------------------------- var domain_name = []; var object_list = []; client2.execute(query1) .then(result => { let pole = result.rows.map(r => { domain_name.push(r.domain_name) }); console.log("Vsetky domenove mena : " , domain_name); domain_name.forEach(name => { let query = 'SELECT * from websucker.content WHERE domain_name = ' + "'" + name + "'" + ' and body_size > 0 ALLOW FILTERING'; client2.execute(query).then( res => { res.rows.map(rr => { object_list.push({ domain_name: rr.domain_name, title: rr.title, body: rr.body, target_link : rr.target_link, id : rr.target_link }); //console.log(object_list); bulkIndex('skweb2', 'web_page', object_list); //console.log(object_list); }); }).catch(error => console.log(error)); }) }).catch(err => console.log(err)); setTimeout(() => console.log(object_list, 15000));