recipes2/internal/parser/parser.go

311 lines
9.1 KiB
Go

package parser
import (
"errors"
"fmt"
"io"
"log/slog"
"net/url"
"recipes/internal/domain/models"
"strconv"
"strings"
"sync"
"github.com/PuerkitoBio/goquery"
"github.com/s32x/httpclient"
)
const baseUrl string = "https://www.vsegdavkusno.ru"
var client *httpclient.Client = httpclient.New().WithBaseURL(baseUrl)
var PHPSESSID string
var parseKey string
var (
KeyNotFoundErr = errors.New("key not found")
CookieNotFoundErr = errors.New("cookie not found")
NotSuccessReqErr = errors.New("not success request")
EmptyLinkErr = errors.New("empty link")
)
// SaveAllPages saves all pages to storage.
func SaveAllPages(log slog.Logger) error {
const op = "parser.SaveAllPages"
// get total
log.Debug("Сохраняю страницу 1...")
total, err := SavePage(log, 1)
if err != nil {
return fmt.Errorf("%s: %w", op, err)
}
fmt.Println("Total =", total)
for i := 2; i <= total; i++ {
log.Debug(fmt.Sprintf("Сохраняю страницу %d...\n", i))
_, err = SavePage(log, i)
log.Debug(fmt.Sprintf("Страница %d сохранена\n", i))
}
return nil
}
// SavePage saves page to storage.
func SavePage(log slog.Logger, page int) (int, error) {
const op = "parser.SavePage"
var resp GetPageResp
var body io.Reader
for i := 0; i <= 3; i++ {
// make form
form := make(url.Values)
form.Add("page", fmt.Sprint(page))
form.Add("action", "filter")
form.Add("pageId", "7")
form.Add("key", parseKey)
// send request
err := client.
WithHeader("Cookie", fmt.Sprintf("PHPSESSID=%s", PHPSESSID)).
WithHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8").
Post("/assets/components/msearch2/action.php").WithForm(form).JSON(&resp)
if err != nil {
return 0, fmt.Errorf("%s: %w", op, err)
}
// update PHPSESSID and key
if resp.Message == "Could not load config" {
GetPHPSESSID(log)
GetKey(log)
continue
}
if !resp.Success {
return 0, fmt.Errorf("%s: %w", op, NotSuccessReqErr)
}
body = strings.NewReader(resp.Data.Results)
break
}
doc, err := goquery.NewDocumentFromReader(body)
if err != nil {
return 0, fmt.Errorf("%s: %w", op, err)
}
var recipes []models.Recipe
doc.Find("div.recipe-card").Each(func(i int, s *goquery.Selection) {
var recipe models.Recipe
recipe.Title = s.Find("div.recipe-card__title").Text()
// recipe.Time = strings.ReplaceAll(strings.ReplaceAll(s.Find("div.recipe-card__time").Text(), "\n", ""), " ", "")
recipe.Image, _ = s.Find("img").Attr("src")
recipe.Image = strings.Replace(recipe.Image, "image_366", "image_732", 1)
recipe.Image = fmt.Sprintf("%s%s", baseUrl, recipe.Image)
recipe.Link, _ = s.Find("a.recipe-card__link").Attr("href")
recipe.Link = fmt.Sprintf("/%s", recipe.Link)
recipes = append(recipes, recipe)
})
var wg sync.WaitGroup
wg.Add(len(recipes))
for i := 0; i < len(recipes); i++ {
go func(i int) {
defer wg.Done()
err = recipes[i].GetRecipe()
if err != nil {
return fmt.Errorf("%s: %w", op, err)
}
}(i)
}
wg.Wait()
return resp.Data.Pages, nil
}
// GetRecipe gets recipe info and saves recipe to storage.
func GetRecipe(r *models.Recipe) error {
const op = "parser.GetRecipe"
if r.Link == "" {
return fmt.Errorf("%s: %w", op, EmptyLinkErr)
}
// send request
body, err := client.
WithHeader("Cookie", fmt.Sprintf("PHPSESSID=%s", PHPSESSID)).
Post(r.Link).String()
if err != nil {
return fmt.Errorf("%s: %w", op, err)
}
bodyr := strings.NewReader(body)
doc, err := goquery.NewDocumentFromReader(bodyr)
if err != nil {
return fmt.Errorf("%s: %w", op, err)
}
// описание
r.Description = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(doc.Find("div.card-binfo__description").Text(), " ", ""), "\n", ""), "\t", "")
// Время готовки, кол-во порций
doc.Find("div.recipe-summary-items>div.recipe-summary-item").Each(func(i int, s *goquery.Selection) {
label := s.Find("div.recipe-summary-item__label").Text()
value := strings.ReplaceAll(s.Find("div.recipe-summary-item__value").Text(), "\n", "")
switch label {
case "Время приготовления:":
r.CookingTime = value
case "Количество порций:":
count, _ := strconv.Atoi(value)
r.ServingsNum = uint(count)
case "Калорийность:":
r.Calories = value
}
})
doc.Find("div.ingredients").Each(func(i int, s *goquery.Selection) {
// ингридиенты для рецепта
var ingredients models.RecipeIngredients
ingredients.Title = s.Find("div.ingredients__title").Text()
s.Find("ul.ingredients__list>li").Each(func(i int, q *goquery.Selection) {
ingredient := strings.ReplaceAll(strings.ReplaceAll(q.Text(), " ", ""), "\n", "")
ingredients.Ingredients = append(ingredients.Ingredients, ingredient)
})
if len(ingredients.Ingredients) != 0 {
r.Ingredients = append(r.Ingredients, ingredients)
}
// шаги рецепта
var recipe_steps []string
s.Find("div.recipe-rich>ol>li>span").Each(func(i int, q *goquery.Selection) {
recipe_step := strings.ReplaceAll(strings.ReplaceAll(q.Text(), " ", ""), "\n", "")
recipe_steps = append(recipe_steps, recipe_step)
})
if len(recipe_steps) != 0 {
r.Recipe_steps = recipe_steps
}
// рекомендации
var advices []string
s.Find("div.recipe-footer__additional-text>p").Each(func(i int, q *goquery.Selection) {
if q.Find("br").Length() > 0 {
html, _ := q.Html()
// if err != nil {
// return fmt.Errorf("%s: %w", op, err)
// }
advice_list := q.SetHtml(strings.Replace(html, "<br/>", "\n", -1)).Text()
advices_arr := strings.Split(advice_list, "\n")
var advices_arr_res []string = make([]string, 0, len(advices_arr))
for _, a := range advices_arr {
if a != "" {
advices_arr_res = append(advices_arr_res, a)
}
}
advices = append(advices, advices_arr_res...)
} else {
var advice string = q.Text()
advices = append(advices, advice)
}
})
if len(advices) != 0 {
// fmt.Printf("LEN ADVICES = %d", len(advices))
r.Advices = advices
}
})
// категории
doc.Find("div.similar-items>a.similar-items__link").Each(func(i int, s *goquery.Selection) {
r.Categories = append(r.Categories, s.Text())
})
// // вывод результатов
// fmt.Println("-------------------")
// fmt.Printf("%+v\n", r)
// fmt.Println("-------------------")
// check recipe exists
ex, err := postgres.DB.RecipeExists(r.Title) // interface!
if err != nil || ex {
return fmt.Errorf("%s: %w", op, fmt.Errorf("recipe already exists"))
}
// save picture
err = r.SaveRecipePicture()
// add to database
var final_recipe models.Recipe = models.Recipe{
Title: r.Title,
Description: r.Description,
Image: r.Image,
CookingTime: r.CookingTime,
Link: r.Link,
ServingsNum: r.ServingsNum,
Calories: r.Calories,
Ingredients: r.Ingredients,
Recipe_steps: r.Recipe_steps,
Advices: r.Advices,
Categories: r.Categories,
}
// insert recipe
err = postgres.DB.AddRecipe(final_recipe)
return fmt.Errorf("%s: %w", op, err)
}
// func (r *Recipe) SaveRecipePicture() error {
// resp, err := http.Get(r.Image)
// if err != nil {
// return err
// }
// defer resp.Body.Close()
// content_len, _ := strconv.ParseInt(resp.Header["Content-Length"][0], 10, 64)
// // change name to generated uuid
// filename := renamefile(getFilenameFromUrl(r.Image), uuid.NewString())
// // upload to minio
// err = cminio.UploadFile(cminio.RecipeImg, filename, resp.Body, resp.Header["Content-Type"][0], content_len)
// // change to filename
// r.Image = filename
// return err
// }
// // url to filename
// func getFilenameFromUrl(url string) string {
// url_els := strings.Split(url, "/")
// return url_els[len(url_els)-1]
// }
// // change file name
// func renamefile(old_filename, new_name string) string {
// old_file_els := strings.Split(old_filename, ".")
// return fmt.Sprintf("%s.%s", new_name, old_file_els[len(old_file_els)-1])
// }
// GetKey gets
func GetKey(log slog.Logger) error {
const op = "parser.GetKey"
log.Debug("Updating KEY...")
defer log.Debug("KEY updated")
str, err := client.WithHeader("Cookie", fmt.Sprintf("PHPSESSID=%s", PHPSESSID)).Post("/recipes").String()
if err != nil {
return fmt.Errorf("%s: %w", op, err)
}
i := strings.Index(str, "\"key\":")
if i != 0 {
parseKey = str[i+7 : i+47]
log.Debug("New KEY =", parseKey)
return nil
}
return fmt.Errorf("%s: %w", op, KeyNotFoundErr)
}
func GetPHPSESSID(log slog.Logger) error {
const op = "parser.GetPHPSESSID"
log.Debug("Updating PHPSESSID...")
defer log.Debug("PHPSESSID updated")
form := make(url.Values)
form.Add("page", "1")
form.Add("action", "filter")
form.Add("pageId", "7")
form.Add("key", "-")
resp, err := client.
WithHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8").
WithHeader("Cookie", "").
Post("/assets/components/msearch2/action.php").WithForm(form).Do()
if err != nil {
return fmt.Errorf("%s: %w", op, err)
}
for _, c := range resp.Response().Cookies() {
if c.Name == "PHPSESSID" {
PHPSESSID = c.Value
log.Debug("New PHPSESSID =", PHPSESSID)
return nil
}
}
return fmt.Errorf("%s: %w", op, CookieNotFoundErr)
}