Commit edd87588 authored by aye's avatar aye
Browse files

Implemented basic search, alloc space issues

parent 7fb1c10e
No related merge requests found
Showing with 100 additions and 0 deletions
+100 -0
const https = require('https')
const REQ_LIMIT = 10
const [source, target] = process.argv.slice(2)
if (!(source && target)) throw new Error('Usage: node find-path.js source target')
// TODO: find the shortest path from `source` to `target`
const source_link = "https://en.wikipedia.org/wiki/" + source
const target_link = "https://en.wikipedia.org/wiki/" + target
// check source and target links to make sure they are real
let visited = new Map()
let curr_checking = []
let next_checking = []
curr_checking.push([source_link, source_link])
const find_wiki_url = (contents) => {
const split_contents = contents.split('<a href="')
split_contents.shift()
let urls = []
for (const splat of split_contents) {
let possible_url = splat.split('"')[0]
if (possible_url.startsWith('/wiki/') && !possible_url.includes(':')) {
urls.push("https://en.wikipedia.org" + possible_url.split('#')[0])
}
}
return urls
}
const post_process = () => {
let url_list = []
let parent = visited.get(target_link)
while (parent && parent != source_link) {
url_list.push(parent)
parent = visited.get(parent)
}
url_list.push(source_link)
url_list.reverse()
for (const url of url_list) {
const page = url.split('https://en.wikipedia.org/wiki/')[0]
console.log(page)
}
process.exit()
}
const search_link = (resolve) => {
let next_set, next_link, parent
do {
next_set = curr_checking.pop()
if (!next_set) {
resolve()
return
}
next_link = next_set[0]
parent = next_set[1]
if (next_link == target_link) {
console.log('yay!')
visited.set(next_link, parent)
post_process()
resolve()
return
}
} while (visited.has(next_link))
//console.log(next_link)
visited.set(next_link, parent)
https.get(next_link, res => {
res.setEncoding('utf8')
let data = ""
res.on('data', chunk => {
data += chunk
})
res.on('end', () => {
let urls = find_wiki_url(data)
urls = urls.map(x => [x, parent])
next_checking.push(...urls)
search_link(resolve)
})
})
}
const parallel_search = () => {
let promises = []
for (let i = 0; i < REQ_LIMIT; i++) {
const aPromise = new Promise((resolve, reject) => {
search_link(resolve)
})
promises.push(aPromise)
}
return promises
}
const search_level = () => {
let promises = parallel_search()
Promise.all(promises).then(() => {
console.log('next', next_checking.length, (new Set(next_checking.map(x => x[0]))).size)
curr_checking = next_checking
next_checking = []
}).then(() => search_level())
}
search_level()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment