Skip to content

Commit

Permalink
💠 Add content to => 487.Identifying Byte Slices
Browse files Browse the repository at this point in the history
  • Loading branch information
sinalalebakhsh committed Nov 22, 2023
1 parent 79bc7a5 commit 9939b70
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 66 deletions.
61 changes: 21 additions & 40 deletions Crawl/Crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,20 @@ Terminal:
*/

package crawl


import (
"fmt"
"golang.org/x/net/html"
"net/http"
"os"
"time"
"golang.org/x/net/html"
)

// Crawl function takes a URL and recursively crawls the pages
func Crawl(url string, depth int, searchDir string) {
func Crawl(url string, depth int) {
if depth <= 0 {
return
}

// Create a directory for the search
today := time.Now().Format("2006-01-02")
searchDir = fmt.Sprintf("%s-%s", today, searchDir)

err := os.Mkdir(searchDir, 0755)
if err != nil {
fmt.Println("Error creating directory:", err)
return
}

// Create a file to store search results
resultFile, err := os.Create(fmt.Sprintf("%s/searchResult.txt", searchDir))
if err != nil {
fmt.Println("Error creating result file:", err)
return
}
defer resultFile.Close()

// Make an HTTP request
resp, err := http.Get(url)
if err != nil {
Expand All @@ -55,35 +37,34 @@ func Crawl(url string, depth int, searchDir string) {
return
}

// Process the links on the current page and write them to the result file
ProcessLinks(doc, resultFile)
// Process the Links on the current page
ProcessLinks(doc)

// Recursively crawl the linked pages
links := ExtractLinks(doc)
for _, link := range links {
Crawl(link, depth-1, searchDir)
Links := ExtractLinks(doc)
for _, link := range Links {
Crawl(link, depth-1)
}
}

// ProcessLinks extracts and prints the links on the current page
func ProcessLinks(n *html.Node, resultFile *os.File) {
// ProcessLinks extracts and prints the Links on the current page
func ProcessLinks(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
link := fmt.Sprintf("Link: %s\n", a.Val)
resultFile.WriteString(link)
fmt.Println("Link:", a.Val)
}
}
}

for c := n.FirstChild; c != nil; c = c.NextSibling {
ProcessLinks(c, resultFile)
for C := n.FirstChild; C != nil; C = C.NextSibling {
ProcessLinks(C)
}
}

// ExtractLinks returns a slice of unique links from the HTML document
// ExtractLinks returns a slice of unique Links from the HTML document
func ExtractLinks(n *html.Node) []string {
var links []string
var Links []string
visited := make(map[string]bool)

var visitNode func(*html.Node)
Expand All @@ -93,18 +74,18 @@ func ExtractLinks(n *html.Node) []string {
if a.Key == "href" {
link := a.Val
if !visited[link] {
links = append(links, link)
Links = append(Links, link)
visited[link] = true
}
}
}
}

for c := n.FirstChild; c != nil; c = c.NextSibling {
visitNode(c)
for C := n.FirstChild; C != nil; C = C.NextSibling {
visitNode(C)
}
}

visitNode(n)
return links
}
return Links
}
192 changes: 192 additions & 0 deletions GetUserInput/GetFirstArg.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@ import (

"github.com/fatih/color"
"github.com/sinalalebakhsh/Gocron/features"

"net/http"
"strconv"

"golang.org/x/net/html"
)


func GetFirstArg() bool {

if len(os.Args) == 1 {
Expand All @@ -32,7 +38,193 @@ func GetFirstArg() bool {
color.HiBlue(fmt.Sprintln(features.OriginalFeatures))
}
}
if len(os.Args) >= 2 {
url := os.Args[1]
depthstr := os.Args[2]

depth, err := strconv.Atoi(depthstr)
if err != nil {
fmt.Println("Error converting depth to integer:", err)
return false
}

CrawlArgs(url, depth)
}


}

return true
}

var SliceOfSearching []string

// CrawlArgs function takes a URL and recursively crawls the pages
func CrawlArgs(url string, depth int) {
if depth <= 0 {
return
}

// Make an HTTP request
resp, err := http.Get(url)
if err != nil {
fmt.Println("Error making request:", err)
// storeObject := fmt.Sprint("Error making request:", err)
// SliceOfSearching = append(SliceOfSearching, storeObject)
return
}
defer resp.Body.Close()

// Parse the HTML content
doc, err := html.Parse(resp.Body)
if err != nil {
fmt.Println("Error parsing HTML:", err)
// storeObject := fmt.Sprint("Error parsing HTML:", err)
// SliceOfSearching = append(SliceOfSearching, storeObject)
return
}

// Process the links on the current page
ProcessLinksArgs(doc)

// Recursively crawl the linked pages
links := ExtractLinksArgs(doc)
for _, link := range links {
CrawlArgs(link, depth-1)
}
}

// ProcessLinksArgs extracts and prints the links on the current page
func ProcessLinksArgs(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
fmt.Println("Link:", a.Val)
// storeObject := fmt.Sprint("Link:", a.Val)
// SliceOfSearching = append(SliceOfSearching, storeObject)
}
}
}

for c := n.FirstChild; c != nil; c = c.NextSibling {
ProcessLinksArgs(c)
}
}

// ExtractLinksArgs returns a slice of unique links from the HTML document
func ExtractLinksArgs(n *html.Node) []string {
var links []string
visited := make(map[string]bool)

var visitNode func(*html.Node)
visitNode = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
link := a.Val
if !visited[link] {
links = append(links, link)
visited[link] = true
}
}
}
}

for c := n.FirstChild; c != nil; c = c.NextSibling {
visitNode(c)
}
}

visitNode(n)
return links
}



/*
func RunStore(SliceOfSearching []string){
// Step 1: Read current count from counter.txt or initialize to 0
count, err := readCounter()
if err != nil {
fmt.Println("Error reading counter:", err)
return
}
// Step 2: Create A{count}.txt
filename := fmt.Sprintf("Crawl/A%d.txt", count)
err = createFile(filename, SliceOfSearching)
if err != nil {
fmt.Println("Error creating file:", err)
return
}
// Step 3: Increment count and write it back to counter.txt
err = incrementAndWriteCounter(count + 1)
if err != nil {
fmt.Println("Error updating counter:", err)
return
}
fmt.Printf("Program completed successfully. Created %s\n", filename)
}
func readCounter() (int, error) {
// Read current count from counter.txt or initialize to 0
content, err := os.ReadFile("counter.txt")
if err != nil {
if os.IsNotExist(err) {
return 0, nil // File does not exist, starting from 0
}
return 0, err
}
// Parse the last line as an integer
lines := strings.Split(strings.TrimSpace(string(content)), "\n")
lastLine := lines[len(lines)-1]
count, err := strconv.Atoi(lastLine)
if err != nil {
return 0, err
}
return count, nil
}
func createFile(filename string, content []string) error {
// Create a new file with the given filename
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close() // Close the file when the function completes
// Write the contents of the slice with line numbers to the file
for i, line := range content {
_, err := file.WriteString(fmt.Sprintf("%d: %s\n", i+1, line))
if err != nil {
return err
}
}
return nil
}
func incrementAndWriteCounter(count int) error {
// Append the updated count to counter.txt
file, err := os.OpenFile("counter.txt", os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return err
}
defer file.Close() // Close the file when the function completes
// Write the incremented count to the file
_, err = file.WriteString(fmt.Sprintf("%d\n", count))
if err != nil {
return err
}
return nil
}
*/
22 changes: 10 additions & 12 deletions GetUserInput/GetUserInput.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,16 @@ func GetUserInput(GetBoleanFromGetFirstArgFunction bool) {

if len(SliceOfWords) >= 2 {
// FirstInput, SecondInput := SliceOfWords[0], SliceOfWords[1]
Regular = IfUsris2orMoreWords(SliceOfWords)
}
// if Regular {
// }
if Regular {
Regular = IfUsris2orMoreWords(SliceOfWords)
}

if len(SliceOfWords) >= 4 {
// fmt.Printf("%q", SliceOfWords)
Regular = IfUserInputIsCrawlURL(SliceOfWords)
if Regular {
Regular = IfUserInputIsCrawlURL(SliceOfWords)
}
}



if Regular {
Regular = IfUsrisALL(FinalInput)
}
Expand Down Expand Up @@ -531,17 +531,15 @@ func SaveToFile(sentence string) error {
func IfUserInputIsCrawlURL(SliceOfWords[]string) bool {
const CRAWL = "crawl"
var URL string
var depthStr string
if CRAWL == strings.ToLower(SliceOfWords[0]) {
if SliceOfWords[1] != "" {
URL = SliceOfWords[1]
depthStr = SliceOfWords[2]
depthStr := SliceOfWords[2]
depth, err := strconv.Atoi(depthStr)
if err != nil {
fmt.Println("Error converting depth to integer:", err)
} else {
searchDir := SliceOfWords[3]
crawl.Crawl(URL, depth, searchDir)
crawl.Crawl(URL, depth)
return false
}
return false
Expand Down
1 change: 1 addition & 0 deletions SearchLogs/user_input.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ crawl https://www.irna.ir/service/province/tehran 1 crawl
crawl https://www.irna.ir/service/province/tehran 2 crawl
crawl https://fa.wikipedia.org/wiki/%d8%a7%d8%b3%d8%aa%d8%a7%d9%86_%d8%aa%d9%87%d8%b1%d8%a7%d9%86 1 tehran
crawl https://astaxie.gitbooks.io/build-web-application-with-golang/content/en/preface.html 1
crawl https://astaxie.gitbooks.io/build-web-application-with-golang/content/en/preface.html 2
8 changes: 4 additions & 4 deletions features/RegEx.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ var TitleOfRegEx = []string{
"ALLREGEX",
}

var OriginalAllRegex = DataBase{
var OriginalAllRegex = DataBase {
Alldatafield: `
189.Regular Expressions
The regular expressions used in this section perform basic matches, but the regexp package
supports an extensive pattern syntax, which is described at https://pkg.go.dev/regexp/syntax@go1.17.1.
The regular expressions used in this section perform basic matches, but the regexp package
supports an extensive pattern syntax, which is described at https://pkg.go.dev/regexp/syntax@go1.17.1.
The Basic Functions Provided by the regexp Package
Function Description
Expand Down Expand Up @@ -726,5 +726,5 @@ var OriginalAllRegex = DataBase{
fmt.Println("String:", builder.String())
}
`,
`,
}
Loading

0 comments on commit 9939b70

Please sign in to comment.