package parser import ( "context" "errors" "fmt" "io" "log/slog" "net/http" "net/url" "recipes/internal/domain/models" "recipes/internal/lib/stringcv" "strconv" "strings" "sync" "github.com/PuerkitoBio/goquery" "github.com/google/uuid" "github.com/s32x/httpclient" ) const baseUrl string = "https://www.vsegdavkusno.ru" var client *httpclient.Client = httpclient.New().WithBaseURL(baseUrl) var PHPSESSID string var parseKey string var ( KeyNotFoundErr = errors.New("key not found") CookieNotFoundErr = errors.New("cookie not found") NotSuccessReqErr = errors.New("not success request") EmptyLinkErr = errors.New("empty link") RecipeExistsErr = errors.New("recipe already exists") FailUpdatePHPSESSID = errors.New("failed to update PHPSESSID") FailUpdateKEY = errors.New("failed to update KEY") ) type pictureSaver interface { SaveRecipeImage(ctx context.Context, imageFile io.Reader, filename string, contentType string, fileSize int64) error } type recipeSaver interface { AddRecipe(ctx context.Context, recipe models.Recipe) error } type recipeProvider interface { RecipeExists(ctx context.Context, title string) (bool, error) } // SaveAllPages saves all pages to storage. func SaveAllPages(log *slog.Logger, ps pictureSaver, rs recipeSaver, rp recipeProvider) error { const op = "parser.SaveAllPages" // get total log.Debug("Сохраняю страницу 1...") total, err := SavePage(log, 1, ps, rs, rp) if err != nil { return fmt.Errorf("%s: %w", op, err) } fmt.Println("Total =", total) for i := 2; i <= total; i++ { log.Debug(fmt.Sprintf("Сохраняю страницу %d...\n", i)) _, err = SavePage(log, i, ps, rs, rp) if err != nil { log.Error("Страница не сохранена", "err", fmt.Errorf("%s: %w", op, err)) continue } log.Debug(fmt.Sprintf("Страница %d сохранена\n", i)) } return nil } // SavePage saves page to storage. func SavePage(log *slog.Logger, page int, ps pictureSaver, rs recipeSaver, rp recipeProvider) (int, error) { const op = "parser.SavePage" var resp GetPageResp var body io.Reader for i := 0; i <= 3; i++ { // make form form := make(url.Values) form.Add("page", fmt.Sprint(page)) form.Add("action", "filter") form.Add("pageId", "7") form.Add("key", parseKey) // send request err := client. WithHeader("Cookie", fmt.Sprintf("PHPSESSID=%s", PHPSESSID)). WithHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"). Post("/assets/components/msearch2/action.php").WithForm(form).JSON(&resp) if err != nil { return 0, fmt.Errorf("%s: %w", op, err) } // update PHPSESSID and KEY if resp.Message == "Could not load config" { err = GetPHPSESSID(log) if err != nil { return 0, fmt.Errorf("%s: %w", op, FailUpdatePHPSESSID) } err = GetKey(log) if err != nil { return 0, fmt.Errorf("%s: %w", op, FailUpdateKEY) } continue } if !resp.Success { return 0, fmt.Errorf("%s: %w", op, NotSuccessReqErr) } body = strings.NewReader(resp.Data.Results) break } doc, err := goquery.NewDocumentFromReader(body) if err != nil { return 0, fmt.Errorf("%s: %w", op, err) } var recipes []models.Recipe doc.Find("div.recipe-card").Each(func(i int, s *goquery.Selection) { var recipe models.Recipe recipe.Title = s.Find("div.recipe-card__title").Text() // recipe.Time = strings.ReplaceAll(strings.ReplaceAll(s.Find("div.recipe-card__time").Text(), "\n", ""), " ", "") recipe.Image, _ = s.Find("img").Attr("src") recipe.Image = strings.Replace(recipe.Image, "image_366", "image_732", 1) recipe.Image = fmt.Sprintf("%s%s", baseUrl, recipe.Image) recipe.Link, _ = s.Find("a.recipe-card__link").Attr("href") recipe.Link = fmt.Sprintf("/%s", recipe.Link) recipes = append(recipes, recipe) }) var wg sync.WaitGroup wg.Add(len(recipes)) for i := 0; i < len(recipes); i++ { go func(i int, log *slog.Logger) { defer wg.Done() err = GetRecipe(&recipes[i], ps, rs, rp) if err != nil { if errors.Is(err, RecipeExistsErr) { log.Warn("Recipe already exists") return } log.Error("Failed to get recipe", "err", fmt.Errorf("%s: %w", op, err)) } }(i, log) } wg.Wait() return resp.Data.Pages, nil } // GetRecipe gets recipe info and saves recipe to storage. func GetRecipe(r *models.Recipe, ps pictureSaver, rs recipeSaver, rp recipeProvider) error { const op = "parser.GetRecipe" if r.Link == "" { return fmt.Errorf("%s: %w", op, EmptyLinkErr) } // send request body, err := client. WithHeader("Cookie", fmt.Sprintf("PHPSESSID=%s", PHPSESSID)). Post(r.Link).String() if err != nil { return fmt.Errorf("%s: %w", op, err) } bodyr := strings.NewReader(body) doc, err := goquery.NewDocumentFromReader(bodyr) if err != nil { return fmt.Errorf("%s: %w", op, err) } // описание r.Description = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(doc.Find("div.card-binfo__description").Text(), " ", ""), "\n", ""), "\t", "") // Время готовки, кол-во порций doc.Find("div.recipe-summary-items>div.recipe-summary-item").Each(func(i int, s *goquery.Selection) { label := s.Find("div.recipe-summary-item__label").Text() value := strings.ReplaceAll(s.Find("div.recipe-summary-item__value").Text(), "\n", "") switch label { case "Время приготовления:": r.CookingTime = value case "Количество порций:": count, _ := strconv.Atoi(value) r.ServingsNum = uint(count) case "Калорийность:": r.Calories = value } }) doc.Find("div.ingredients").Each(func(i int, s *goquery.Selection) { // ингридиенты для рецепта var ingredients models.RecipeIngredients ingredients.Title = s.Find("div.ingredients__title").Text() s.Find("ul.ingredients__list>li").Each(func(i int, q *goquery.Selection) { ingredient := strings.ReplaceAll(strings.ReplaceAll(q.Text(), " ", ""), "\n", "") ingredients.Ingredients = append(ingredients.Ingredients, ingredient) }) if len(ingredients.Ingredients) != 0 { r.Ingredients = append(r.Ingredients, ingredients) } // шаги рецепта var recipe_steps []string s.Find("div.recipe-rich>ol>li>span").Each(func(i int, q *goquery.Selection) { recipe_step := strings.ReplaceAll(strings.ReplaceAll(q.Text(), " ", ""), "\n", "") recipe_steps = append(recipe_steps, recipe_step) }) if len(recipe_steps) != 0 { r.Recipe_steps = recipe_steps } // рекомендации var advices []string s.Find("div.recipe-footer__additional-text>p").Each(func(i int, q *goquery.Selection) { if q.Find("br").Length() > 0 { html, _ := q.Html() // if err != nil { // return fmt.Errorf("%s: %w", op, err) // } advice_list := q.SetHtml(strings.Replace(html, "
", "\n", -1)).Text() advices_arr := strings.Split(advice_list, "\n") var advices_arr_res []string = make([]string, 0, len(advices_arr)) for _, a := range advices_arr { if a != "" { advices_arr_res = append(advices_arr_res, a) } } advices = append(advices, advices_arr_res...) } else { var advice string = q.Text() advices = append(advices, advice) } }) if len(advices) != 0 { // fmt.Printf("LEN ADVICES = %d", len(advices)) r.Advices = advices } }) // категории doc.Find("div.similar-items>a.similar-items__link").Each(func(i int, s *goquery.Selection) { r.Categories = append(r.Categories, s.Text()) }) // check recipe exists ex, err := rp.RecipeExists(context.Background(), r.Title) // interface! if err != nil || ex { return fmt.Errorf("%s: %w", op, RecipeExistsErr) } // save picture err = SaveRecipePicture(r, ps) if err != nil { return fmt.Errorf("%s: %w", op, err) } // insert recipe err = rs.AddRecipe(context.Background(), *r) if err != nil { return fmt.Errorf("%s: %w", op, err) } return nil } func SaveRecipePicture(r *models.Recipe, ps pictureSaver) error { const op = "parser.SaveRecipePicture" resp, err := http.Get(r.Image) if err != nil { return fmt.Errorf("%s: %w", op, err) } defer resp.Body.Close() content_len, _ := strconv.ParseInt(resp.Header["Content-Length"][0], 10, 64) // change name to generated uuid filename := stringcv.RenameFile(stringcv.GetFilenameFromUrl(r.Image), uuid.NewString()) // upload to storage err = ps.SaveRecipeImage(context.Background(), resp.Body, filename, resp.Header["Content-Type"][0], content_len) if err != nil { return err } // change to filename r.Image = filename return err } // GetKey gets func GetKey(log *slog.Logger) error { const op = "parser.GetKey" log.Debug("Updating KEY...") defer log.Debug("KEY updated") str, err := client.WithHeader("Cookie", fmt.Sprintf("PHPSESSID=%s", PHPSESSID)).Post("/recipes").String() if err != nil { return fmt.Errorf("%s: %w", op, err) } i := strings.Index(str, "\"key\":") if i != 0 { parseKey = str[i+7 : i+47] log.Debug("New KEY", "key", parseKey) return nil } return fmt.Errorf("%s: %w", op, KeyNotFoundErr) } func GetPHPSESSID(log *slog.Logger) error { const op = "parser.GetPHPSESSID" log.Debug("Updating PHPSESSID...") defer log.Debug("PHPSESSID updated") form := make(url.Values) form.Add("page", "1") form.Add("action", "filter") form.Add("pageId", "7") form.Add("key", "-") resp, err := client. WithHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"). WithHeader("Cookie", ""). Post("/assets/components/msearch2/action.php").WithForm(form).Do() if err != nil { return fmt.Errorf("%s: %w", op, err) } for _, c := range resp.Response().Cookies() { if c.Name == "PHPSESSID" { PHPSESSID = c.Value log.Debug("New PHPSESSID", "PHPSESSID", PHPSESSID) return nil } } return fmt.Errorf("%s: %w", op, CookieNotFoundErr) }